Index | Thread | Search

From:
Marcus Glocker <marcus@nazgul.ch>
Subject:
em(4) TSO support -- 2nd try
To:
tech@openbsd.org
Date:
Thu, 15 Feb 2024 20:07:00 +0100

Download raw body.

Thread
Hi,

For the unaligned memory access panic on sparc64, and the watchdog
timeout in conjunction with ix(4)/vlan(4), fixes have been found and
committed in the meantime.  This is the same TSO diff for em(4) which
I've already committed last time.  The only adaption is that it uses
the newly introduced 'iphlen' and 'paylen' variables from
ether_extract_headers().

I've tested the diff on Hrjoves setup from which the initial issue has
been reported, using the same test tool he did, without facing any
watchdog timeouts.

ok?


Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -u -p -r1.373 if_em.c
--- dev/pci/if_em.c	14 Feb 2024 22:41:48 -0000	1.373
+++ dev/pci/if_em.c	15 Feb 2024 18:33:35 -0000
@@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc
 			 struct mbuf *);
 u_int	em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
 	    u_int32_t *, u_int32_t *);
+u_int	em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+	    u_int32_t *);
 u_int	em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
 	    u_int32_t *);
 void em_iff(struct em_softc *);
@@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc)
  *
  *  This routine maps the mbufs to tx descriptors.
  *
- *  return 0 on success, positive on failure
+ *  return 0 on failure, positive on success
  **********************************************************************/
 u_int
 em_encap(struct em_queue *que, struct mbuf *m)
@@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb
 	}
 
 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
-		used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
+		if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+			used += em_tso_setup(que, m, head, &txd_upper,
+			    &txd_lower);
+			if (!used)
+				return (used);
+		} else {
+			used += em_tx_ctx_setup(que, m, head, &txd_upper,
+			    &txd_lower);
+		}
 	} else if (sc->hw.mac_type >= em_82543) {
 		used += em_transmit_checksum_setup(que, m, head,
 		    &txd_upper, &txd_lower);
@@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s
 		ifp->if_link_state = link_state;
 		if_link_state_change(ifp);
 	}
+
+	/* Disable TSO for 10/100 speeds to avoid some hardware issues */
+	switch (sc->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
+			ifp->if_capabilities &= ~IFCAP_TSOv4;
+			ifp->if_capabilities &= ~IFCAP_TSOv6;
+		}
+		break;
+	case SPEED_1000:
+		if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210)
+			ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+		break;
+	}
 }
 
 /*********************************************************************
@@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc)
 	if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
 		ifp->if_capabilities |= IFCAP_CSUM_IPv4;
 		ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+		ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
 	}
 
 	/* 
@@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s
 
 		for (i = 0; i < sc->sc_tx_slots; i++) {
 			pkt = &que->tx.sc_tx_pkts_ring[i];
-			error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
+			error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
 			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
-			    MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
+			    EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
 			if (error != 0) {
 				printf("%s: Unable to create TX DMA map\n",
 				    DEVNAME(sc));
@@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so
 		    0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 	}
+}
+
+u_int
+em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+    u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+	struct ether_extracted ext;
+	struct e1000_adv_tx_context_desc *TD;
+	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
+
+	*olinfo_status = 0;
+	*cmd_type_len = 0;
+	TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+
+#if NVLAN > 0
+	if (ISSET(mp->m_flags, M_VLANTAG)) {
+		uint32_t vtag = mp->m_pkthdr.ether_vtag;
+		vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
+		*cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+	}
+#endif
+
+	ether_extract_headers(mp, &ext);
+	if (ext.tcp == NULL)
+		goto out;
+
+	vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
+
+	if (ext.ip4) {
+		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+#ifdef INET6
+	} else if (ext.ip6) {
+		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+#endif
+	} else {
+		goto out;
+	}
+
+	*cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+	*cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
+	*olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+	vlan_macip_lens |= ext.iphlen;
+	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+
+	mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
+	mss_l4len_idx |= (ext.tcp->th_off << 2) << E1000_ADVTXD_L4LEN_SHIFT;
+	/* 82575 needs the queue index added */
+	if (que->sc->hw.mac_type == em_82575)
+		mss_l4len_idx |= (que->me & 0xff) << 4;
+
+	htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+	htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+	htolem32(&TD->u.seqnum_seed, 0);
+	htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
+
+	tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) /
+	    mp->m_pkthdr.ph_mss);
+
+	return 1;
+
+out:
+	tcpstat_inc(tcps_outbadtso);
+	return 0;
 }
 
 u_int
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
diff -u -p -u -p -r1.82 if_em.h
--- dev/pci/if_em.h	28 Jan 2024 18:42:58 -0000	1.82
+++ dev/pci/if_em.h	15 Feb 2024 18:33:35 -0000
@@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include <net/if.h>
 #include <net/if_media.h>
+#include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 
 #if NBPFILTER > 0
@@ -269,6 +272,7 @@ typedef int	boolean_t;
 
 #define EM_MAX_SCATTER		64
 #define EM_TSO_SIZE		65535
+#define EM_TSO_SEG_SIZE		4096	/* Max dma segment size */
 
 struct em_packet {
 	int		 pkt_eop;	/* Index of the desc to watch */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
diff -u -p -u -p -r1.92 if_em_hw.h
--- dev/pci/if_em_hw.h	28 Jan 2024 18:42:58 -0000	1.92
+++ dev/pci/if_em_hw.h	15 Feb 2024 18:33:36 -0000
@@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc {
 #define E1000_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
 #define E1000_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
 #define E1000_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
 #define E1000_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
 
 /* Adv Transmit Descriptor Config Masks */
@@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc {
 #define E1000_ADVTXD_TUCMD_IPV6		0x00000000  /* IP Packet Type: 0=IPv6 */
 #define E1000_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
 #define E1000_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
+
+/* Req requires Markers and CRC */
+#define E1000_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT		16 /* Adv ctxt MSS shift */
 
 /* Multiple Receive Queue Control */
 #define E1000_MRQC_ENABLE_MASK              0x00000003