Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: ixl/ice(4): Avoid unnecessary defrag of TSO packets
To:
Jan Klemkow <jan@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 12 Sep 2025 17:57:49 +0200

Download raw body.

Thread
> Date: Fri, 12 Sep 2025 16:00:39 +0200
> From: Jan Klemkow <jan@openbsd.org>
> 
> Hi,
> 
> ixl(4) and ice(4) network cards can handle just up to 8 DMA segments for
> a regular packet, but 128 segments for a TSO packet.  TSO packets reach
> the 8 segments limit very fast.  Thus, we run into unnecessary
> m_defrag() calls, which cost throughput.
> 
> This diff sets the limit to 128 segments for ice(4) and ixl(4) and adds an
> additional check for non-TSO packets.
> 
> ok?

I don't think so.  This bypasses all the bus_dma(9) logic.  There
isn't necessarily a relationship between the number of elements in an
mbuf chain and the number of bus_dma(9) segments.

I think you should use separate bus_dma(9) maps for TSO.

> Index: dev/pci/if_ice.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ice.c,v
> diff -u -p -r1.58 if_ice.c
> --- dev/pci/if_ice.c	19 Aug 2025 11:46:52 -0000	1.58
> +++ dev/pci/if_ice.c	12 Sep 2025 13:17:23 -0000
> @@ -13868,7 +13868,7 @@ ice_tso_detect_sparse(struct mbuf *m, st
>  		hlen -= seglen;
>  	}
>  
> -	maxsegs = ICE_MAX_TX_SEGS - hdrs;
> +	maxsegs = ICE_MAX_TSO_SEGS - hdrs;
>  
>  	/* We must count the headers, in order to verify that they take up
>  	 * 3 or fewer descriptors. However, we don't need to check the data
> @@ -13957,13 +13957,23 @@ ice_tx_setup_offload(struct mbuf *m0, st
>  static inline int
>  ice_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
>  {
> +	struct mbuf *n;
> +	int segs = 0;
>  	int error;
>  
> +	/* Non-TSO packets are limited to max. segments of ICE_MAX_TX_SEGS. */
> +	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
> +		for (n = m; n != NULL; n = n->m_next)
> +			if (++segs > ICE_MAX_TX_SEGS)
> +				goto defrag;
> +	}
> +
>  	error = bus_dmamap_load_mbuf(dmat, map, m,
>  	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
>  	if (error != EFBIG)
>  		return (error);
>  
> + defrag:
>  	error = m_defrag(m, M_DONTWAIT);
>  	if (error != 0)
>  		return (error);
> @@ -29671,7 +29681,7 @@ ice_tx_queues_alloc(struct ice_softc *sc
>  		for (j = 0; j < sc->isc_ntxd[i]; j++) {
>  			map = &txq->tx_map[j];
>  			if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES,
> -			    ICE_MAX_TX_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
> +			    ICE_MAX_TSO_SEGS, ICE_MAX_DMA_SEG_SIZE, 0,
>  			    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
>  			    &map->txm_map) != 0) {
>  				printf("%s: could not allocate Tx DMA map\n",
> Index: dev/pci/if_ixl.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
> diff -u -p -r1.108 if_ixl.c
> --- dev/pci/if_ixl.c	24 Jun 2025 11:03:10 -0000	1.108
> +++ dev/pci/if_ixl.c	12 Sep 2025 13:17:23 -0000
> @@ -900,6 +900,7 @@ struct ixl_rx_wb_desc_32 {
>  } __packed __aligned(16);
>  
>  #define IXL_TX_PKT_DESCS		8
> +#define IXL_TX_TSO_PKT_DESCS		128
>  #define IXL_TX_QUEUE_ALIGN		128
>  #define IXL_RX_QUEUE_ALIGN		128
>  
> @@ -2579,7 +2580,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
>  		txm = &maps[i];
>  
>  		if (bus_dmamap_create(sc->sc_dmat,
> -		    MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
> +		    MAXMCLBYTES, IXL_TX_TSO_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
>  		    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
>  		    &txm->txm_map) != 0)
>  			goto uncreate;
> @@ -2749,13 +2750,23 @@ ixl_txr_free(struct ixl_softc *sc, struc
>  static inline int
>  ixl_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
>  {
> +	struct mbuf *n;
> +	int segs = 0;
>  	int error;
>  
> +	/* Non-TSO packets are limited to max. segments of IXL_TX_PKT_DESCS. */
> +	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
> +		for (n = m; n != NULL; n = n->m_next)
> +			if (++segs > IXL_TX_PKT_DESCS)
> +				goto defrag;
> +	}
> +
>  	error = bus_dmamap_load_mbuf(dmat, map, m,
>  	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
>  	if (error != EFBIG)
>  		return (error);
>  
> + defrag:
>  	error = m_defrag(m, M_DONTWAIT);
>  	if (error != 0)
>  		return (error);
> @@ -2885,7 +2896,7 @@ ixl_start(struct ifqueue *ifq)
>  
>  	for (;;) {
>  		/* We need one extra descriptor for TSO packets. */
> -		if (free <= (IXL_TX_PKT_DESCS + 1)) {
> +		if (free <= (IXL_TX_TSO_PKT_DESCS + 1)) {
>  			ifq_set_oactive(ifq);
>  			break;
>  		}
> 
>