Download raw body.
em(4) TSO support -- 2nd try
On Fri, Feb 16, 2024 at 02:04:53PM +0100, Alexander Bluhm wrote:
> On Thu, Feb 15, 2024 at 08:07:00PM +0100, Marcus Glocker wrote:
> > @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so
> > 0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
> > BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
> > }
> > +}
> > +
> > +u_int
> > +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
> > + u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
> > +{
> > + struct ether_extracted ext;
> > + struct e1000_adv_tx_context_desc *TD;
> > + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
> > +
> > + *olinfo_status = 0;
> > + *cmd_type_len = 0;
> > + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
> > +
> > +#if NVLAN > 0
> > + if (ISSET(mp->m_flags, M_VLANTAG)) {
> > + uint32_t vtag = mp->m_pkthdr.ether_vtag;
> > + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
> > + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
> > + }
> > +#endif
> > +
> > + ether_extract_headers(mp, &ext);
> > + if (ext.tcp == NULL)
> > + goto out;
> > +
> > + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
> > +
> > + if (ext.ip4) {
> > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
> > + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
> > +#ifdef INET6
> > + } else if (ext.ip6) {
> > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
> > +#endif
> > + } else {
> > + goto out;
> > + }
> > +
> > + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
> > + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
> > + *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
> > + vlan_macip_lens |= ext.iphlen;
> > + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
> > +
> > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
> > + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
> > +
> > + mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
> > + mss_l4len_idx |= (ext.tcp->th_off << 2) << E1000_ADVTXD_L4LEN_SHIFT;
>
> Here it crashes on sparc64. Use ext.tcphlen.
Yes, I missed that :-( Thanks for spotting! Updated diff.
Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -u -p -r1.373 if_em.c
--- dev/pci/if_em.c 14 Feb 2024 22:41:48 -0000 1.373
+++ dev/pci/if_em.c 16 Feb 2024 13:10:58 -0000
@@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc
struct mbuf *);
u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
u_int32_t *, u_int32_t *);
+u_int em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
+ u_int32_t *);
u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
u_int32_t *);
void em_iff(struct em_softc *);
@@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc)
*
* This routine maps the mbufs to tx descriptors.
*
- * return 0 on success, positive on failure
+ * return 0 on failure, positive on success
**********************************************************************/
u_int
em_encap(struct em_queue *que, struct mbuf *m)
@@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb
}
if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
- used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
+ if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ used += em_tso_setup(que, m, head, &txd_upper,
+ &txd_lower);
+ if (!used)
+ return (used);
+ } else {
+ used += em_tx_ctx_setup(que, m, head, &txd_upper,
+ &txd_lower);
+ }
} else if (sc->hw.mac_type >= em_82543) {
used += em_transmit_checksum_setup(que, m, head,
&txd_upper, &txd_lower);
@@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s
ifp->if_link_state = link_state;
if_link_state_change(ifp);
}
+
+ /* Disable TSO for 10/100 speeds to avoid some hardware issues */
+ switch (sc->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
+ ifp->if_capabilities &= ~IFCAP_TSOv4;
+ ifp->if_capabilities &= ~IFCAP_TSOv6;
+ }
+ break;
+ case SPEED_1000:
+ if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210)
+ ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+ break;
+ }
}
/*********************************************************************
@@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc)
if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
ifp->if_capabilities |= IFCAP_CSUM_IPv4;
ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+ ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
}
/*
@@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s
for (i = 0; i < sc->sc_tx_slots; i++) {
pkt = &que->tx.sc_tx_pkts_ring[i];
- error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
+ error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
- MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
+ EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
if (error != 0) {
printf("%s: Unable to create TX DMA map\n",
DEVNAME(sc));
@@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so
0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
}
+}
+
+u_int
+em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
+ u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
+{
+ struct ether_extracted ext;
+ struct e1000_adv_tx_context_desc *TD;
+ uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
+
+ *olinfo_status = 0;
+ *cmd_type_len = 0;
+ TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
+
+#if NVLAN > 0
+ if (ISSET(mp->m_flags, M_VLANTAG)) {
+ uint32_t vtag = mp->m_pkthdr.ether_vtag;
+ vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
+ *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+ }
+#endif
+
+ ether_extract_headers(mp, &ext);
+ if (ext.tcp == NULL)
+ goto out;
+
+ vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
+
+ if (ext.ip4) {
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
+ *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+#ifdef INET6
+ } else if (ext.ip6) {
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
+#endif
+ } else {
+ goto out;
+ }
+
+ *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
+ *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
+ *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+ vlan_macip_lens |= ext.iphlen;
+ type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+ type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
+ *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+
+ mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
+ mss_l4len_idx |= ext.tcphlen << E1000_ADVTXD_L4LEN_SHIFT;
+ /* 82575 needs the queue index added */
+ if (que->sc->hw.mac_type == em_82575)
+ mss_l4len_idx |= (que->me & 0xff) << 4;
+
+ htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
+ htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
+ htolem32(&TD->u.seqnum_seed, 0);
+ htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
+
+ tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) /
+ mp->m_pkthdr.ph_mss);
+
+ return 1;
+
+out:
+ tcpstat_inc(tcps_outbadtso);
+ return 0;
}
u_int
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
diff -u -p -u -p -r1.82 if_em.h
--- dev/pci/if_em.h 28 Jan 2024 18:42:58 -0000 1.82
+++ dev/pci/if_em.h 16 Feb 2024 13:10:58 -0000
@@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE.
#include <net/if.h>
#include <net/if_media.h>
+#include <net/route.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/if_ether.h>
#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#if NBPFILTER > 0
@@ -269,6 +272,7 @@ typedef int boolean_t;
#define EM_MAX_SCATTER 64
#define EM_TSO_SIZE 65535
+#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */
struct em_packet {
int pkt_eop; /* Index of the desc to watch */
Index: dev/pci/if_em_hw.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
diff -u -p -u -p -r1.92 if_em_hw.h
--- dev/pci/if_em_hw.h 28 Jan 2024 18:42:58 -0000 1.92
+++ dev/pci/if_em_hw.h 16 Feb 2024 13:10:59 -0000
@@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc {
#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
/* Adv Transmit Descriptor Config Masks */
@@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc {
#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */
#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
+
+/* Req requires Markers and CRC */
+#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
/* Multiple Receive Queue Control */
#define E1000_MRQC_ENABLE_MASK 0x00000003
em(4) TSO support -- 2nd try