Download raw body.
em(4) TSO support -- 2nd try
On Fri, Feb 16, 2024 at 02:16:55PM +0100, Marcus Glocker wrote:
> Yes, I missed that :-( Thanks for spotting! Updated diff.
Tested on amd64
em0 at pci2 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 00:1b:21:da:f6:ec
em2 at pci3 dev 0 function 0 "Intel 82580" rev 0x01: msi, address 90:e2:ba:78:7c:f0
em4 at pci3 dev 0 function 2 "Intel 82580" rev 0x01: msi, address 90:e2:ba:78:7c:f2
em6 at pci4 dev 0 function 0 "Intel 82571EB" rev 0x06: apic 2 int 16, address 00:1b:21:7b:d1:10
em8 at pci5 dev 0 function 0 "Intel I350" rev 0x01: msi, address 00:25:90:e7:ec:7c
em10 at pci10 dev 0 function 0 "Intel I350" rev 0x01: msi, address a0:36:9f:04:fb:3c
em12 at pci10 dev 0 function 2 "Intel I350" rev 0x01: msi, address a0:36:9f:04:fb:3e
em14 at pci11 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 00:1b:21:60:58:28
em16 at pci15 dev 0 function 0 "Intel 82571EB" rev 0x06: apic 3 int 13, address 00:50:c2:01:f4:60
Tested on amd64 with VLan
em0 at pci7 dev 0 function 0 "Intel I350" rev 0x01: msi, address a0:36:9f:0a:4a:c4
em2 at pci8 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 00:1b:21:74:2d:d0
em4 at pci17 dev 0 function 0 "Intel 82575EB" rev 0x02: msi, address 00:19:99:ab:27:70
Tested on sparc64
em2 at pci7 dev 0 function 0 "Intel 82571EB" rev 0x06: ivec 0x14, address 00:21:28:13:9c:0e
em4 at pci11 dev 0 function 0 "Intel 82580" rev 0x01: msi, address 90:e2:ba:61:58:80
em8 at pci18 dev 0 function 0 "Intel 82576" rev 0x01: msi, address 90:e2:ba:d3:e4:6e
OK bluhm@
> Index: dev/pci/if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> diff -u -p -u -p -r1.373 if_em.c
> --- dev/pci/if_em.c 14 Feb 2024 22:41:48 -0000 1.373
> +++ dev/pci/if_em.c 16 Feb 2024 13:10:58 -0000
> @@ -291,6 +291,8 @@ void em_receive_checksum(struct em_softc
> struct mbuf *);
> u_int em_transmit_checksum_setup(struct em_queue *, struct mbuf *, u_int,
> u_int32_t *, u_int32_t *);
> +u_int em_tso_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
> + u_int32_t *);
> u_int em_tx_ctx_setup(struct em_queue *, struct mbuf *, u_int, u_int32_t *,
> u_int32_t *);
> void em_iff(struct em_softc *);
> @@ -1188,7 +1190,7 @@ em_flowstatus(struct em_softc *sc)
> *
> * This routine maps the mbufs to tx descriptors.
> *
> - * return 0 on success, positive on failure
> + * return 0 on failure, positive on success
> **********************************************************************/
> u_int
> em_encap(struct em_queue *que, struct mbuf *m)
> @@ -1236,7 +1238,15 @@ em_encap(struct em_queue *que, struct mb
> }
>
> if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
> - used += em_tx_ctx_setup(que, m, head, &txd_upper, &txd_lower);
> + if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
> + used += em_tso_setup(que, m, head, &txd_upper,
> + &txd_lower);
> + if (!used)
> + return (used);
> + } else {
> + used += em_tx_ctx_setup(que, m, head, &txd_upper,
> + &txd_lower);
> + }
> } else if (sc->hw.mac_type >= em_82543) {
> used += em_transmit_checksum_setup(que, m, head,
> &txd_upper, &txd_lower);
> @@ -1569,6 +1579,21 @@ em_update_link_status(struct em_softc *s
> ifp->if_link_state = link_state;
> if_link_state_change(ifp);
> }
> +
> + /* Disable TSO for 10/100 speeds to avoid some hardware issues */
> + switch (sc->link_speed) {
> + case SPEED_10:
> + case SPEED_100:
> + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
> + ifp->if_capabilities &= ~IFCAP_TSOv4;
> + ifp->if_capabilities &= ~IFCAP_TSOv6;
> + }
> + break;
> + case SPEED_1000:
> + if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210)
> + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
> + break;
> + }
> }
>
> /*********************************************************************
> @@ -1988,6 +2013,7 @@ em_setup_interface(struct em_softc *sc)
> if (sc->hw.mac_type >= em_82575 && sc->hw.mac_type <= em_i210) {
> ifp->if_capabilities |= IFCAP_CSUM_IPv4;
> ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
> + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
> }
>
> /*
> @@ -2231,9 +2257,9 @@ em_setup_transmit_structures(struct em_s
>
> for (i = 0; i < sc->sc_tx_slots; i++) {
> pkt = &que->tx.sc_tx_pkts_ring[i];
> - error = bus_dmamap_create(sc->sc_dmat, MAX_JUMBO_FRAME_SIZE,
> + error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
> EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
> - MAX_JUMBO_FRAME_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
> + EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
> if (error != 0) {
> printf("%s: Unable to create TX DMA map\n",
> DEVNAME(sc));
> @@ -2403,6 +2429,73 @@ em_free_transmit_structures(struct em_so
> 0, que->tx.sc_tx_dma.dma_map->dm_mapsize,
> BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
> }
> +}
> +
> +u_int
> +em_tso_setup(struct em_queue *que, struct mbuf *mp, u_int head,
> + u_int32_t *olinfo_status, u_int32_t *cmd_type_len)
> +{
> + struct ether_extracted ext;
> + struct e1000_adv_tx_context_desc *TD;
> + uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
> +
> + *olinfo_status = 0;
> + *cmd_type_len = 0;
> + TD = (struct e1000_adv_tx_context_desc *)&que->tx.sc_tx_desc_ring[head];
> +
> +#if NVLAN > 0
> + if (ISSET(mp->m_flags, M_VLANTAG)) {
> + uint32_t vtag = mp->m_pkthdr.ether_vtag;
> + vlan_macip_lens |= vtag << E1000_ADVTXD_VLAN_SHIFT;
> + *cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
> + }
> +#endif
> +
> + ether_extract_headers(mp, &ext);
> + if (ext.tcp == NULL)
> + goto out;
> +
> + vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
> +
> + if (ext.ip4) {
> + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
> + *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
> +#ifdef INET6
> + } else if (ext.ip6) {
> + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
> +#endif
> + } else {
> + goto out;
> + }
> +
> + *cmd_type_len |= E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS;
> + *cmd_type_len |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DCMD_TSE;
> + *olinfo_status |= ext.paylen << E1000_ADVTXD_PAYLEN_SHIFT;
> + vlan_macip_lens |= ext.iphlen;
> + type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
> +
> + type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
> + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
> +
> + mss_l4len_idx |= mp->m_pkthdr.ph_mss << E1000_ADVTXD_MSS_SHIFT;
> + mss_l4len_idx |= ext.tcphlen << E1000_ADVTXD_L4LEN_SHIFT;
> + /* 82575 needs the queue index added */
> + if (que->sc->hw.mac_type == em_82575)
> + mss_l4len_idx |= (que->me & 0xff) << 4;
> +
> + htolem32(&TD->vlan_macip_lens, vlan_macip_lens);
> + htolem32(&TD->type_tucmd_mlhl, type_tucmd_mlhl);
> + htolem32(&TD->u.seqnum_seed, 0);
> + htolem32(&TD->mss_l4len_idx, mss_l4len_idx);
> +
> + tcpstat_add(tcps_outpkttso, (ext.paylen + mp->m_pkthdr.ph_mss - 1) /
> + mp->m_pkthdr.ph_mss);
> +
> + return 1;
> +
> +out:
> + tcpstat_inc(tcps_outbadtso);
> + return 0;
> }
>
> u_int
> Index: dev/pci/if_em.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.h,v
> diff -u -p -u -p -r1.82 if_em.h
> --- dev/pci/if_em.h 28 Jan 2024 18:42:58 -0000 1.82
> +++ dev/pci/if_em.h 16 Feb 2024 13:10:58 -0000
> @@ -55,11 +55,14 @@ POSSIBILITY OF SUCH DAMAGE.
>
> #include <net/if.h>
> #include <net/if_media.h>
> +#include <net/route.h>
>
> #include <netinet/in.h>
> #include <netinet/ip.h>
> #include <netinet/if_ether.h>
> #include <netinet/tcp.h>
> +#include <netinet/tcp_timer.h>
> +#include <netinet/tcp_var.h>
> #include <netinet/udp.h>
>
> #if NBPFILTER > 0
> @@ -269,6 +272,7 @@ typedef int boolean_t;
>
> #define EM_MAX_SCATTER 64
> #define EM_TSO_SIZE 65535
> +#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */
>
> struct em_packet {
> int pkt_eop; /* Index of the desc to watch */
> Index: dev/pci/if_em_hw.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em_hw.h,v
> diff -u -p -u -p -r1.92 if_em_hw.h
> --- dev/pci/if_em_hw.h 28 Jan 2024 18:42:58 -0000 1.92
> +++ dev/pci/if_em_hw.h 16 Feb 2024 13:10:59 -0000
> @@ -2150,6 +2150,7 @@ struct e1000_adv_tx_context_desc {
> #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
> #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
> #define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
> +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
> #define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
>
> /* Adv Transmit Descriptor Config Masks */
> @@ -2159,6 +2160,10 @@ struct e1000_adv_tx_context_desc {
> #define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */
> #define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
> #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
> +
> +/* Req requires Markers and CRC */
> +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
> +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
>
> /* Multiple Receive Queue Control */
> #define E1000_MRQC_ENABLE_MASK 0x00000003
em(4) TSO support -- 2nd try