Download raw body.
ixl/ice(4): Avoid unnecessary defrag of TSO packets
> Date: Fri, 12 Sep 2025 16:00:39 +0200
> From: Jan Klemkow <jan@openbsd.org>
>
> Hi,
>
> ixl(4) and ice(4) network cards can handle just up to 8 DMA segments for
> a regular packet, but 128 segments for a TSO packet. TSO packets reach
> the 8 segments limit very fast. Thus, we run into unnecessary
> m_defrag() calls, which cost throughput.
>
> This diff sets the limit to 128 segments for ice(4) and ixl(4) and adds an
> additional check for non-TSO packets.
>
> ok?
I don't think so. This bypasses all the bus_dma(9) logic. There
isn't necessarily a relationship between the number of elements in an
mbuf chain and the number of bus_dma(9) segments.
I think you should use separate bus_dma(9) maps for TSO.
> Index: dev/pci/if_ice.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ice.c,v
> diff -u -p -r1.58 if_ice.c
> --- dev/pci/if_ice.c 19 Aug 2025 11:46:52 -0000 1.58
> +++ dev/pci/if_ice.c 12 Sep 2025 13:17:23 -0000
> @@ -13868,7 +13868,7 @@ ice_tso_detect_sparse(struct mbuf *m, st
> hlen -= seglen;
> }
>
> - maxsegs = ICE_MAX_TX_SEGS - hdrs;
> + maxsegs = ICE_MAX_TSO_SEGS - hdrs;
>
> /* We must count the headers, in order to verify that they take up
> * 3 or fewer descriptors. However, we don't need to check the data
> @@ -13957,13 +13957,23 @@ ice_tx_setup_offload(struct mbuf *m0, st
> static inline int
> ice_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
> {
> + struct mbuf *n;
> + int segs = 0;
> int error;
>
> + /* Non-TSO packets are limited to max. segments of ICE_MAX_TX_SEGS. */
> + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
> + for (n = m; n != NULL; n = n->m_next)
> + if (++segs > ICE_MAX_TX_SEGS)
> + goto defrag;
> + }
> +
> error = bus_dmamap_load_mbuf(dmat, map, m,
> BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
> if (error != EFBIG)
> return (error);
>
> + defrag:
> error = m_defrag(m, M_DONTWAIT);
> if (error != 0)
> return (error);
> @@ -29671,7 +29681,7 @@ ice_tx_queues_alloc(struct ice_softc *sc
> for (j = 0; j < sc->isc_ntxd[i]; j++) {
> map = &txq->tx_map[j];
> if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES,
> - ICE_MAX_TX_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
> + ICE_MAX_TSO_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
> BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> &map->txm_map) != 0) {
> printf("%s: could not allocate Tx DMA map\n",
> Index: dev/pci/if_ixl.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
> diff -u -p -r1.108 if_ixl.c
> --- dev/pci/if_ixl.c 24 Jun 2025 11:03:10 -0000 1.108
> +++ dev/pci/if_ixl.c 12 Sep 2025 13:17:23 -0000
> @@ -900,6 +900,7 @@ struct ixl_rx_wb_desc_32 {
> } __packed __aligned(16);
>
> #define IXL_TX_PKT_DESCS 8
> +#define IXL_TX_TSO_PKT_DESCS 128
> #define IXL_TX_QUEUE_ALIGN 128
> #define IXL_RX_QUEUE_ALIGN 128
>
> @@ -2579,7 +2580,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
> txm = &maps[i];
>
> if (bus_dmamap_create(sc->sc_dmat,
> - MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
> + MAXMCLBYTES, IXL_TX_TSO_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
> BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> &txm->txm_map) != 0)
> goto uncreate;
> @@ -2749,13 +2750,23 @@ ixl_txr_free(struct ixl_softc *sc, struc
> static inline int
> ixl_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
> {
> + struct mbuf *n;
> + int segs = 0;
> int error;
>
> + /* Non-TSO packets are limited to max. segments of IXL_TX_PKT_DESCS. */
> + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
> + for (n = m; n != NULL; n = n->m_next)
> + if (++segs > IXL_TX_PKT_DESCS)
> + goto defrag;
> + }
> +
> error = bus_dmamap_load_mbuf(dmat, map, m,
> BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
> if (error != EFBIG)
> return (error);
>
> + defrag:
> error = m_defrag(m, M_DONTWAIT);
> if (error != 0)
> return (error);
> @@ -2885,7 +2896,7 @@ ixl_start(struct ifqueue *ifq)
>
> for (;;) {
> /* We need one extra descriptor for TSO packets. */
> - if (free <= (IXL_TX_PKT_DESCS + 1)) {
> + if (free <= (IXL_TX_TSO_PKT_DESCS + 1)) {
> ifq_set_oactive(ifq);
> break;
> }
>
>
ixl/ice(4): Avoid unnecessary defrag of TSO packets