From: Marcus Glocker Subject: Re: em(4) TSO support -- 2nd try To: Alexander Bluhm Cc: tech@openbsd.org Date: Fri, 16 Feb 2024 14:16:55 +0100 On Fri, Feb 16, 2024 at 02:04:53PM +0100, Alexander Bluhm wrote: > On Thu, Feb 15, 2024 at 08:07:00PM +0100, Marcus Glocker wrote: > > @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so > > 0, que->tx.sc_tx_dma.dma_map->dm_mapsize, > > BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); > > } > > +} > > + > > +u_int > > +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head, > > + u_int32_t *olinfo_status, u_int32_t *cmd_type_len) > > +{ > > + struct ether_extracted ext; > > + struct e1000_adv_tx_context_desc *TD; > > + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; > > + > > + *olinfo_status = 0; > > + *cmd_type_len = 0; > > + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head]; > > + > > +#if NVLAN > 0 > > + if (ISSET(mp->m_flags, M_VLANTAG)) { > > + uint32_t vtag = mp->m_pkthdr.ether_vtag; > > + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT; > > + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE; > > + } > > +#endif > > + > > + ether_extract_headers(mp, &ext); > > + if (ext.tcp == NULL) > > + goto out; > > + > > + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT); > > + > > + if (ext.ip4) { > > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; > > + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; > > +#ifdef INET6 > > + } else if (ext.ip6) { > > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; > > +#endif > > + } else { > > + goto out; > > + } > > + > > + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS; > > + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE; > > + *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT; > > + vlan_macip_lens |= ext.iphlen; > > + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; > > + > > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; > > + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; > > + > > + mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT; > > + mss_l4len_idx |= (ext.tcp->th_off << 2) << E1000_ADVTXD_L4LEN_SHIFT; > > Here it crashes on sparc64. Use ext.tcphlen. Yes, I missed that :-( Thanks for spotting! Updated diff. Index: dev/pci/if_em.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.c,v diff -u -p -u -p -r1.373 if_em.c --- dev/pci/if_em.c 14 Feb 2024 22:41:48 -0000 1.373 +++ dev/pci/if_em.c 16 Feb 2024 13:10:58 -0000 @@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc struct mbuf *); u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, u_int32_t *); +u_int em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, + u_int32_t *); u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, u_int32_t *); void em_iff(struct em_softc *); @@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc) * * This routine maps the mbufs to tx descriptors. * - * return 0 on success, positive on failure + * return 0 on failure, positive on success **********************************************************************/ u_int em_encap(struct em_queue *que, struct mbuf *m) @@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb } if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { - used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower); + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + used += em_tso_setup(que, m, head, &txd_upper, + &txd_lower); + if (!used) + return (used); + } else { + used += em_tx_ctx_setup(que, m, head, &txd_upper, + &txd_lower); + } } else if (sc->hw.mac_type >= em_82543) { used += em_transmit_checksum_setup(que, m, head, &txd_upper, &txd_lower); @@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s ifp->if_link_state = link_state; if_link_state_change(ifp); } + + /* Disable TSO for 10/100 speeds to avoid some hardware issues */ + switch (sc->link_speed) { + case SPEED_10: + case SPEED_100: + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { + ifp->if_capabilities &= ~IFCAP_TSOv4; + ifp->if_capabilities &= ~IFCAP_TSOv6; + } + break; + case SPEED_1000: + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; + break; + } } /********************************************************************* @@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc) if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { ifp->if_capabilities |= IFCAP_CSUM_IPv4; ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; } /* @@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s for (i = 0; i < sc->sc_tx_slots; i++) { pkt = &que->tx.sc_tx_pkts_ring[i]; - error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE, + error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE, EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1), - MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); + EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); if (error != 0) { printf("%s: Unable to create TX DMA map\n", DEVNAME(sc)); @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so 0, que->tx.sc_tx_dma.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } +} + +u_int +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head, + u_int32_t *olinfo_status, u_int32_t *cmd_type_len) +{ + struct ether_extracted ext; + struct e1000_adv_tx_context_desc *TD; + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; + + *olinfo_status = 0; + *cmd_type_len = 0; + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head]; + +#if NVLAN > 0 + if (ISSET(mp->m_flags, M_VLANTAG)) { + uint32_t vtag = mp->m_pkthdr.ether_vtag; + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT; + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + } +#endif + + ether_extract_headers(mp, &ext); + if (ext.tcp == NULL) + goto out; + + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT); + + if (ext.ip4) { + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; +#ifdef INET6 + } else if (ext.ip6) { + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; +#endif + } else { + goto out; + } + + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS; + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE; + *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT; + vlan_macip_lens |= ext.iphlen; + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + + mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT; + mss_l4len_idx |= ext.tcphlen << E1000_ADVTXD_L4LEN_SHIFT; + /* 82575 needs the queue index added */ + if (que->sc->hw.mac_type == em_82575) + mss_l4len_idx |= (que->me & 0xff) << 4; + + htolem32(&TD->vlan_macip_lens, vlan_macip_lens); + htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl); + htolem32(&TD->u.seqnum_seed, 0); + htolem32(&TD->mss_l4len_idx, mss_l4len_idx); + + tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) / + mp->m_pkthdr.ph_mss); + + return 1; + +out: + tcpstat_inc(tcps_outbadtso); + return 0; } u_int Index: dev/pci/if_em.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.h,v diff -u -p -u -p -r1.82 if_em.h --- dev/pci/if_em.h 28 Jan 2024 18:42:58 -0000 1.82 +++ dev/pci/if_em.h 16 Feb 2024 13:10:58 -0000 @@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include #include +#include +#include #include #if NBPFILTER > 0 @@ -269,6 +272,7 @@ typedef int boolean_t; #define EM_MAX_SCATTER 64 #define EM_TSO_SIZE 65535 +#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ struct em_packet { int pkt_eop; /* Index of the desc to watch */ Index: dev/pci/if_em_hw.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v diff -u -p -u -p -r1.92 if_em_hw.h --- dev/pci/if_em_hw.h 28 Jan 2024 18:42:58 -0000 1.92 +++ dev/pci/if_em_hw.h 16 Feb 2024 13:10:59 -0000 @@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc { #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ #define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ #define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ /* Adv Transmit Descriptor Config Masks */ @@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc { #define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ #define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ + +/* Req requires Markers and CRC */ +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ /* Multiple Receive Queue Control */ #define E1000_MRQC_ENABLE_MASK 0x00000003