From: Marcus Glocker Subject: em(4) TSO support -- 2nd try To: tech@openbsd.org Date: Thu, 15 Feb 2024 20:07:00 +0100 Hi, For the unaligned memory access panic on sparc64, and the watchdog timeout in conjunction with ix(4)/vlan(4), fixes have been found and committed in the meantime. This is the same TSO diff for em(4) which I've already committed last time. The only adaption is that it uses the newly introduced 'iphlen' and 'paylen' variables from ether_extract_headers(). I've tested the diff on Hrjoves setup from which the initial issue has been reported, using the same test tool he did, without facing any watchdog timeouts. ok? Index: dev/pci/if_em.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.c,v diff -u -p -u -p -r1.373 if_em.c --- dev/pci/if_em.c 14 Feb 2024 22:41:48 -0000 1.373 +++ dev/pci/if_em.c 15 Feb 2024 18:33:35 -0000 @@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc struct mbuf *); u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, u_int32_t *); +u_int em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, + u_int32_t *); u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, u_int32_t *); void em_iff(struct em_softc *); @@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc) * * This routine maps the mbufs to tx descriptors. * - * return 0 on success, positive on failure + * return 0 on failure, positive on success **********************************************************************/ u_int em_encap(struct em_queue *que, struct mbuf *m) @@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb } if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { - used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower); + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + used += em_tso_setup(que, m, head, &txd_upper, + &txd_lower); + if (!used) + return (used); + } else { + used += em_tx_ctx_setup(que, m, head, &txd_upper, + &txd_lower); + } } else if (sc->hw.mac_type >= em_82543) { used += em_transmit_checksum_setup(que, m, head, &txd_upper, &txd_lower); @@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s ifp->if_link_state = link_state; if_link_state_change(ifp); } + + /* Disable TSO for 10/100 speeds to avoid some hardware issues */ + switch (sc->link_speed) { + case SPEED_10: + case SPEED_100: + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { + ifp->if_capabilities &= ~IFCAP_TSOv4; + ifp->if_capabilities &= ~IFCAP_TSOv6; + } + break; + case SPEED_1000: + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; + break; + } } /********************************************************************* @@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc) if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { ifp->if_capabilities |= IFCAP_CSUM_IPv4; ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; } /* @@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s for (i = 0; i < sc->sc_tx_slots; i++) { pkt = &que->tx.sc_tx_pkts_ring[i]; - error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE, + error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE, EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1), - MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); + EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); if (error != 0) { printf("%s: Unable to create TX DMA map\n", DEVNAME(sc)); @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so 0, que->tx.sc_tx_dma.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } +} + +u_int +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head, + u_int32_t *olinfo_status, u_int32_t *cmd_type_len) +{ + struct ether_extracted ext; + struct e1000_adv_tx_context_desc *TD; + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; + + *olinfo_status = 0; + *cmd_type_len = 0; + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head]; + +#if NVLAN > 0 + if (ISSET(mp->m_flags, M_VLANTAG)) { + uint32_t vtag = mp->m_pkthdr.ether_vtag; + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT; + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE; + } +#endif + + ether_extract_headers(mp, &ext); + if (ext.tcp == NULL) + goto out; + + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT); + + if (ext.ip4) { + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; +#ifdef INET6 + } else if (ext.ip6) { + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; +#endif + } else { + goto out; + } + + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS; + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE; + *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT; + vlan_macip_lens |= ext.iphlen; + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + + mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT; + mss_l4len_idx |= (ext.tcp->th_off << 2) << E1000_ADVTXD_L4LEN_SHIFT; + /* 82575 needs the queue index added */ + if (que->sc->hw.mac_type == em_82575) + mss_l4len_idx |= (que->me & 0xff) << 4; + + htolem32(&TD->vlan_macip_lens, vlan_macip_lens); + htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl); + htolem32(&TD->u.seqnum_seed, 0); + htolem32(&TD->mss_l4len_idx, mss_l4len_idx); + + tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) / + mp->m_pkthdr.ph_mss); + + return 1; + +out: + tcpstat_inc(tcps_outbadtso); + return 0; } u_int Index: dev/pci/if_em.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em.h,v diff -u -p -u -p -r1.82 if_em.h --- dev/pci/if_em.h 28 Jan 2024 18:42:58 -0000 1.82 +++ dev/pci/if_em.h 15 Feb 2024 18:33:35 -0000 @@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include #include +#include +#include #include #if NBPFILTER > 0 @@ -269,6 +272,7 @@ typedef int boolean_t; #define EM_MAX_SCATTER 64 #define EM_TSO_SIZE 65535 +#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ struct em_packet { int pkt_eop; /* Index of the desc to watch */ Index: dev/pci/if_em_hw.h =================================================================== RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v diff -u -p -u -p -r1.92 if_em_hw.h --- dev/pci/if_em_hw.h 28 Jan 2024 18:42:58 -0000 1.92 +++ dev/pci/if_em_hw.h 15 Feb 2024 18:33:36 -0000 @@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc { #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ #define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ #define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ /* Adv Transmit Descriptor Config Masks */ @@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc { #define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ #define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ + +/* Req requires Markers and CRC */ +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ /* Multiple Receive Queue Control */ #define E1000_MRQC_ENABLE_MASK 0x00000003