From: Alexander Bluhm Subject: Re: em(4) TSO support -- 2nd try To: Marcus Glocker Cc: tech@openbsd.org Date: Fri, 16 Feb 2024 22:53:31 +0100 On Fri, Feb 16, 2024 at 02:16:55PM +0100, Marcus Glocker wrote: > Yes, I missed that :-( Thanks for spotting! Updated diff. Tested on amd64 em0 at pci2 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 00:1b:21:da:f6:ec em2 at pci3 dev 0 function 0 "Intel 82580" rev 0x01: msi, address 90:e2:ba:78:7c:f0 em4 at pci3 dev 0 function 2 "Intel 82580" rev 0x01: msi, address 90:e2:ba:78:7c:f2 em6 at pci4 dev 0 function 0 "Intel 82571EB" rev 0x06: apic 2 int 16, address 00:1b:21:7b:d1:10 em8 at pci5 dev 0 function 0 "Intel I350" rev 0x01: msi, address 00:25:90:e7:ec:7c em10 at pci10 dev 0 function 0 "Intel I350" rev 0x01: msi, address a0:36:9f:04:fb:3c em12 at pci10 dev 0 function 2 "Intel I350" rev 0x01: msi, address a0:36:9f:04:fb:3e em14 at pci11 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 00:1b:21:60:58:28 em16 at pci15 dev 0 function 0 "Intel 82571EB" rev 0x06: apic 3 int 13, address 00:50:c2:01:f4:60 Tested on amd64 with VLan em0 at pci7 dev 0 function 0 "Intel I350" rev 0x01: msi, address a0:36:9f:0a:4a:c4 em2 at pci8 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 00:1b:21:74:2d:d0 em4 at pci17 dev 0 function 0 "Intel 82575EB" rev 0x02: msi, address 00:19:99:ab:27:70 Tested on sparc64 em2 at pci7 dev 0 function 0 "Intel 82571EB" rev 0x06: ivec 0x14, address 00:21:28:13:9c:0e em4 at pci11 dev 0 function 0 "Intel 82580" rev 0x01: msi, address 90:e2:ba:61:58:80 em8 at pci18 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 90:e2:ba:d3:e4:6e OK bluhm@ > Index: dev/pci/if_em.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_em.c,v > diff -u -p -u -p -r1.373 if_em.c > --- dev/pci/if_em.c 14 Feb 2024 22:41:48 -0000 1.373 > +++ dev/pci/if_em.c 16 Feb 2024 13:10:58 -0000 > @@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc > struct mbuf *); > u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int, > u_int32_t *, u_int32_t *); > +u_int em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, > + u_int32_t *); > u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *, > u_int32_t *); > void em_iff(struct em_softc *); > @@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc) > * > * This routine maps the mbufs to tx descriptors. > * > - * return 0 on success, positive on failure > + * return 0 on failure, positive on success > **********************************************************************/ > u_int > em_encap(struct em_queue *que, struct mbuf *m) > @@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb > } > > if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { > - used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower); > + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { > + used += em_tso_setup(que, m, head, &txd_upper, > + &txd_lower); > + if (!used) > + return (used); > + } else { > + used += em_tx_ctx_setup(que, m, head, &txd_upper, > + &txd_lower); > + } > } else if (sc->hw.mac_type >= em_82543) { > used += em_transmit_checksum_setup(que, m, head, > &txd_upper, &txd_lower); > @@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s > ifp->if_link_state = link_state; > if_link_state_change(ifp); > } > + > + /* Disable TSO for 10/100 speeds to avoid some hardware issues */ > + switch (sc->link_speed) { > + case SPEED_10: > + case SPEED_100: > + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { > + ifp->if_capabilities &= ~IFCAP_TSOv4; > + ifp->if_capabilities &= ~IFCAP_TSOv6; > + } > + break; > + case SPEED_1000: > + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) > + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; > + break; > + } > } > > /********************************************************************* > @@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc) > if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) { > ifp->if_capabilities |= IFCAP_CSUM_IPv4; > ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; > } > > /* > @@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s > > for (i = 0; i < sc->sc_tx_slots; i++) { > pkt = &que->tx.sc_tx_pkts_ring[i]; > - error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE, > + error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE, > EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1), > - MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); > + EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); > if (error != 0) { > printf("%s: Unable to create TX DMA map\n", > DEVNAME(sc)); > @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so > 0, que->tx.sc_tx_dma.dma_map->dm_mapsize, > BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); > } > +} > + > +u_int > +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head, > + u_int32_t *olinfo_status, u_int32_t *cmd_type_len) > +{ > + struct ether_extracted ext; > + struct e1000_adv_tx_context_desc *TD; > + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; > + > + *olinfo_status = 0; > + *cmd_type_len = 0; > + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head]; > + > +#if NVLAN > 0 > + if (ISSET(mp->m_flags, M_VLANTAG)) { > + uint32_t vtag = mp->m_pkthdr.ether_vtag; > + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT; > + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE; > + } > +#endif > + > + ether_extract_headers(mp, &ext); > + if (ext.tcp == NULL) > + goto out; > + > + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT); > + > + if (ext.ip4) { > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; > + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; > +#ifdef INET6 > + } else if (ext.ip6) { > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; > +#endif > + } else { > + goto out; > + } > + > + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS; > + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE; > + *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT; > + vlan_macip_lens |= ext.iphlen; > + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; > + > + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; > + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; > + > + mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT; > + mss_l4len_idx |= ext.tcphlen << E1000_ADVTXD_L4LEN_SHIFT; > + /* 82575 needs the queue index added */ > + if (que->sc->hw.mac_type == em_82575) > + mss_l4len_idx |= (que->me & 0xff) << 4; > + > + htolem32(&TD->vlan_macip_lens, vlan_macip_lens); > + htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl); > + htolem32(&TD->u.seqnum_seed, 0); > + htolem32(&TD->mss_l4len_idx, mss_l4len_idx); > + > + tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) / > + mp->m_pkthdr.ph_mss); > + > + return 1; > + > +out: > + tcpstat_inc(tcps_outbadtso); > + return 0; > } > > u_int > Index: dev/pci/if_em.h > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_em.h,v > diff -u -p -u -p -r1.82 if_em.h > --- dev/pci/if_em.h 28 Jan 2024 18:42:58 -0000 1.82 > +++ dev/pci/if_em.h 16 Feb 2024 13:10:58 -0000 > @@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE. > > #include > #include > +#include > > #include > #include > #include > #include > +#include > +#include > #include > > #if NBPFILTER > 0 > @@ -269,6 +272,7 @@ typedef int boolean_t; > > #define EM_MAX_SCATTER 64 > #define EM_TSO_SIZE 65535 > +#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ > > struct em_packet { > int pkt_eop; /* Index of the desc to watch */ > Index: dev/pci/if_em_hw.h > =================================================================== > RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v > diff -u -p -u -p -r1.92 if_em_hw.h > --- dev/pci/if_em_hw.h 28 Jan 2024 18:42:58 -0000 1.92 > +++ dev/pci/if_em_hw.h 16 Feb 2024 13:10:59 -0000 > @@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc { > #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ > #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ > #define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ > +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ > #define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ > > /* Adv Transmit Descriptor Config Masks */ > @@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc { > #define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ > #define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ > #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ > + > +/* Req requires Markers and CRC */ > +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ > +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ > > /* Multiple Receive Queue Control */ > #define E1000_MRQC_ENABLE_MASK 0x00000003