Index | Thread | Search

From:
Marcus Glocker <marcus@nazgul.ch>
Subject:
Re: em(4) TSO support -- 2nd try
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Fri, 16 Feb 2024 14:16:55 +0100

Download raw body.

Thread
On Fri, Feb 16, 2024 at 02:04:53PM +0100, Alexander Bluhm wrote:

> On Thu, Feb 15, 2024 at 08:07:00PM +0100, Marcus Glocker wrote:
> > @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so
> >  		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
> >  		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
> >  	}
> > +}
> > +
> > +u_int
> > +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
> > +    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
> > +{
> > +	struct ether_extracted ext;
> > +	struct e1000_adv_tx_context_desc *TD;
> > +	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
> > +
> > +	*olinfo_status = 0;
> > +	*cmd_type_len = 0;
> > +	TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
> > +
> > +#if NVLAN > 0
> > +	if (ISSET(mp->m_flags, M_VLANTAG)) {
> > +		uint32_t vtag = mp->m_pkthdr.ether_vtag;
> > +		vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
> > +		*cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
> > +	}
> > +#endif
> > +
> > +	ether_extract_headers(mp, &ext);
> > +	if (ext.tcp == NULL)
> > +		goto out;
> > +
> > +	vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
> > +
> > +	if (ext.ip4) {
> > +		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
> > +		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
> > +#ifdef INET6
> > +	} else if (ext.ip6) {
> > +		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
> > +#endif
> > +	} else {
> > +		goto out;
> > +	}
> > +
> > +	*cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
> > +	*cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
> > +	*olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
> > +	vlan_macip_lens |= ext.iphlen;
> > +	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
> > +
> > +	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
> > +	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
> > +
> > +	mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
> > +	mss_l4len_idx |= (ext.tcp->th_off << 2) << E1000_ADVTXD_L4LEN_SHIFT;
> 
> Here it crashes on sparc64.  Use ext.tcphlen.

Yes, I missed that :-(  Thanks for spotting!  Updated diff.


Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -u -p -r1.373 if_em.c
--- dev/pci/if_em.c	14 Feb 2024 22:41:48 -0000	1.373
+++ dev/pci/if_em.c	16 Feb 2024 13:10:58 -0000
@@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc
 			 struct mbuf *);
 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
 	    u_int32_t *, u_int32_t *);
+u_int	em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+	    u_int32_t *);
 u_int	em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
 	    u_int32_t *);
 void em_iff(struct em_softc *);
@@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc)
  *
  *  This routine maps the mbufs to tx descriptors.
  *
- *  return 0 on success, positive on failure
+ *  return 0 on failure, positive on success
  **********************************************************************/
 u_int
 em_encap(struct em_queue *que, struct mbuf *m)
@@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb
 	}
 
 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
-		used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
+		if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+			used += em_tso_setup(que, m, head, &txd_upper,
+			    &txd_lower);
+			if (!used)
+				return (used);
+		} else {
+			used += em_tx_ctx_setup(que, m, head, &txd_upper,
+			    &txd_lower);
+		}
 	} else if (sc->hw.mac_type >= em_82543) {
 		used += em_transmit_checksum_setup(que, m, head,
 		    &txd_upper, &txd_lower);
@@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s
 		ifp->if_link_state = link_state;
 		if_link_state_change(ifp);
 	}
+
+	/* Disable TSO for 10/100 speeds to avoid some hardware issues */
+	switch (sc->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
+			ifp->if_capabilities &= ~IFCAP_TSOv4;
+			ifp->if_capabilities &= ~IFCAP_TSOv6;
+		}
+		break;
+	case SPEED_1000:
+		if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210)
+			ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+		break;
+	}
 }
 
 /*********************************************************************
@@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc)
 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
 		ifp->if_capabilities |= IFCAP_CSUM_IPv4;
 		ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+		ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
 	}
 
 	/* 
@@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s
 
 		for (i = 0; i < sc->sc_tx_slots; i++) {
 			pkt = &que->tx.sc_tx_pkts_ring[i];
-			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
+			error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
-			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
+			    EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
 			if (error != 0) {
 				printf("%s: Unable to create TX DMA map\n",
 				    DEVNAME(sc));
@@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so
 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	}
+}
+
+u_int
+em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+	struct ether_extracted ext;
+	struct e1000_adv_tx_context_desc *TD;
+	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
+
+	*olinfo_status = 0;
+	*cmd_type_len = 0;
+	TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+
+#if NVLAN > 0
+	if (ISSET(mp->m_flags, M_VLANTAG)) {
+		uint32_t vtag = mp->m_pkthdr.ether_vtag;
+		vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
+		*cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+	}
+#endif
+
+	ether_extract_headers(mp, &ext);
+	if (ext.tcp == NULL)
+		goto out;
+
+	vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
+
+	if (ext.ip4) {
+		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+#ifdef INET6
+	} else if (ext.ip6) {
+		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+#endif
+	} else {
+		goto out;
+	}
+
+	*cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+	*cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
+	*olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+	vlan_macip_lens |= ext.iphlen;
+	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+
+	mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
+	mss_l4len_idx |= ext.tcphlen << E1000_ADVTXD_L4LEN_SHIFT;
+	/* 82575 needs the queue index added */
+	if (que->sc->hw.mac_type == em_82575)
+		mss_l4len_idx |= (que->me & 0xff) << 4;
+
+	htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+	htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+	htolem32(&TD->u.seqnum_seed, 0);
+	htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
+
+	tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) /
+	    mp->m_pkthdr.ph_mss);
+
+	return 1;
+
+out:
+	tcpstat_inc(tcps_outbadtso);
+	return 0;
 }
 
 u_int
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
diff -u -p -u -p -r1.82 if_em.h
--- dev/pci/if_em.h	28 Jan 2024 18:42:58 -0000	1.82
+++ dev/pci/if_em.h	16 Feb 2024 13:10:58 -0000
@@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include <net/if.h>
 #include <net/if_media.h>
+#include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 
 #if NBPFILTER > 0
@@ -269,6 +272,7 @@ typedef int	boolean_t;
 
 #define EM_MAX_SCATTER		64
 #define EM_TSO_SIZE		65535
+#define EM_TSO_SEG_SIZE		4096	/* Max dma segment size */
 
 struct em_packet {
 	int		 pkt_eop;	/* Index of the desc to watch */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
diff -u -p -u -p -r1.92 if_em_hw.h
--- dev/pci/if_em_hw.h	28 Jan 2024 18:42:58 -0000	1.92
+++ dev/pci/if_em_hw.h	16 Feb 2024 13:10:59 -0000
@@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc {
 #define E1000_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
 #define E1000_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
 #define E1000_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
 #define E1000_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
 
 /* Adv Transmit Descriptor Config Masks */
@@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc {
 #define E1000_ADVTXD_TUCMD_IPV6		0x00000000  /* IP Packet Type: 0=IPv6 */
 #define E1000_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
 #define E1000_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
+
+/* Req requires Markers and CRC */
+#define E1000_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT		16 /* Adv ctxt MSS shift */
 
 /* Multiple Receive Queue Control */
 #define E1000_MRQC_ENABLE_MASK              0x00000003