Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
Re: ether extract headers alingment memcpy
To:
tech@openbsd.org
Date:
Tue, 13 Feb 2024 01:23:14 +0100

Download raw body.

Thread
On Wed, Feb 07, 2024 at 01:07:15AM +0100, Alexander Bluhm wrote:
> This is my next aproach to retract IP and TCP header information
> from an mbuf in driver level.  Idea is to use memcpy() to move data
> from the unaligned header bit fields to integers.
> 
> I create a union of bytes and bits on the stack.  memcpy() gets the
> byte from the mbuf, compiler cares about alingment on the stack.
> A lot of code to trick gcc into generating byte access on sparc64.
> 
> The IP and TCP header fields are stored in struct ether_extracted
> so the drivers can just use them without memcpy() magic.
> 
> I also added more length checks.  Do clear pointers to headers if
> they have bgous length information.  On top I also track packet and
> payload length and also check them for consistency.  I think this
> is better than putting stupid values into driver registers based
> on evil packets.
> 
> To make driver debugging easier, I added a lot of debug prints if
> packets look strange.
> 
> Basic testing seems to work, but I will test on more network hardware
> and architectures.  Diff is not ready for commit.
> 
> Is the idea with memcpy() and union of bit fields a good one?
> Are there arguments against more consistency checks at driver level?
> Do we want all the debug prints?
> Should we keep the m_pkthdr.len checks and track the payload?

With this diff, all access to IP and TCP header is done with sparc64
ldub instructions.

  1133                  memcpy(&hdrcpy.hc_data, ext->ip4, 1);
  1134                  hlen = hdrcpy.hc_ip.hl << 2;
  1135                  if (m->m_len - hoff < hlen) {

/usr/src/sys/net/if_ethersubr.c:1133
     490:       c2 08 80 03     ldub  [ %g2 + %g3 ], %g1
     494:       c2 2f a7 df     stb  %g1, [ %fp + 0x7df ]
/usr/src/sys/net/if_ethersubr.c:1134
     498:       82 08 60 ff     and  %g1, 0xff, %g1
/usr/src/sys/net/if_ethersubr.c:1135
     49c:       c4 02 20 18     ld  [ %o0 + 0x18 ], %g2
/usr/src/sys/net/if_ethersubr.c:1134
     4a0:       82 08 60 0f     and  %g1, 0xf, %g1
     4a4:       83 28 60 02     sll  %g1, 2, %g1
     4a8:       87 38 60 00     sra  %g1, 0, %g3
/usr/src/sys/net/if_ethersubr.c:1135

  1187                  memcpy(&hdrcpy.hc_data, &ext->tcp->th_flags - 1, 1);
  1188                  hlen = hdrcpy.hc_th.off << 2;
  1189                  if (m->m_len - hoff < hlen) {

/usr/src/sys/net/if_ethersubr.c:1187
     5f4:       c4 08 60 0c     ldub  [ %g1 + 0xc ], %g2
     5f8:       c4 2f a7 df     stb  %g2, [ %fp + 0x7df ]
/usr/src/sys/net/if_ethersubr.c:1188
     5fc:       c2 5f a7 df     ldx  [ %fp + 0x7df ], %g1
/usr/src/sys/net/if_ethersubr.c:1189
     600:       c6 02 20 18     ld  [ %o0 + 0x18 ], %g3
/usr/src/sys/net/if_ethersubr.c:1188
     604:       83 30 70 3c     srlx  %g1, 0x3c, %g1
     608:       83 28 60 02     sll  %g1, 2, %g1
/usr/src/sys/net/if_ethersubr.c:1189
     60c:       86 20 c0 04     sub  %g3, %g4, %g3
/usr/src/sys/net/if_ethersubr.c:1188
     610:       85 38 60 00     sra  %g1, 0, %g2
/usr/src/sys/net/if_ethersubr.c:1189

I have tested it on sparc64 with em, ix, ixl.

ok?

bluhm

Index: dev/pci/if_bnxt.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_bnxt.c,v
diff -u -p -r1.45 if_bnxt.c
--- dev/pci/if_bnxt.c	19 Jan 2024 03:25:13 -0000	1.45
+++ dev/pci/if_bnxt.c	11 Feb 2024 19:52:37 -0000
@@ -1433,13 +1433,13 @@ bnxt_start(struct ifqueue *ifq)
 				lflags |= TX_BD_LONG_LFLAGS_LSO;
 				hdrsize = sizeof(*ext.eh);
 				if (ext.ip4)
-					hdrsize += ext.ip4->ip_hl << 2;
+					hdrsize += ext.ip4hlen;
 				else if (ext.ip6)
 					hdrsize += sizeof(*ext.ip6);
 				else
 					tcpstat_inc(tcps_outbadtso);
 
-				hdrsize += ext.tcp->th_off << 2;
+				hdrsize += ext.tcphlen;
 				txhi->hdr_size = htole16(hdrsize / 2);
 
 				outlen = m->m_pkthdr.ph_mss;
Index: dev/pci/if_em.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -r1.371 if_em.c
--- dev/pci/if_em.c	28 Jan 2024 18:42:58 -0000	1.371
+++ dev/pci/if_em.c	11 Feb 2024 19:52:37 -0000
@@ -2433,7 +2433,7 @@ em_tx_ctx_setup(struct em_queue *que, st
 	vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
 
 	if (ext.ip4) {
-		iphlen = ext.ip4->ip_hl << 2;
+		iphlen = ext.ip4hlen;
 
 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
Index: dev/pci/if_igc.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.c,v
diff -u -p -r1.15 if_igc.c
--- dev/pci/if_igc.c	23 Jan 2024 08:48:12 -0000	1.15
+++ dev/pci/if_igc.c	11 Feb 2024 19:52:37 -0000
@@ -2028,7 +2028,7 @@ igc_tx_ctx_setup(struct tx_ring *txr, st
 	ether_extract_headers(mp, &ext);
 
 	if (ext.ip4) {
-		iphlen = ext.ip4->ip_hl << 2;
+		iphlen = ext.ip4hlen;
 
 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
Index: dev/pci/if_ix.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
diff -u -p -r1.206 if_ix.c
--- dev/pci/if_ix.c	10 Nov 2023 15:51:20 -0000	1.206
+++ dev/pci/if_ix.c	11 Feb 2024 19:52:37 -0000
@@ -2502,7 +2502,7 @@ ixgbe_tx_offload(struct mbuf *mp, uint32
 	*vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT);
 
 	if (ext.ip4) {
-		iphlen = ext.ip4->ip_hl << 2;
+		iphlen = ext.ip4hlen;
 
 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
 			*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2542,7 +2542,7 @@ ixgbe_tx_offload(struct mbuf *mp, uint32
 		if (ext.tcp) {
 			uint32_t hdrlen, thlen, paylen, outlen;
 
-			thlen = ext.tcp->th_off << 2;
+			thlen = ext.tcphlen;
 
 			outlen = mp->m_pkthdr.ph_mss;
 			*mss_l4len_idx |= outlen << IXGBE_ADVTXD_MSS_SHIFT;
@@ -3277,11 +3277,11 @@ ixgbe_rxeof(struct rx_ring *rxr)
 					hdrlen += ETHER_VLAN_ENCAP_LEN;
 #endif
 				if (ext.ip4)
-					hdrlen += ext.ip4->ip_hl << 2;
+					hdrlen += ext.ip4hlen;
 				if (ext.ip6)
 					hdrlen += sizeof(*ext.ip6);
 				if (ext.tcp) {
-					hdrlen += ext.tcp->th_off << 2;
+					hdrlen += ext.tcphlen;
 					tcpstat_inc(tcps_inhwlro);
 					tcpstat_add(tcps_inpktlro, pkts);
 				} else {
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
diff -u -p -r1.95 if_ixl.c
--- dev/pci/if_ixl.c	7 Jan 2024 21:01:45 -0000	1.95
+++ dev/pci/if_ixl.c	11 Feb 2024 19:52:37 -0000
@@ -2827,7 +2827,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
 		    IXL_TX_DESC_CMD_IIPT_IPV4_CSUM :
 		    IXL_TX_DESC_CMD_IIPT_IPV4;
  
-		hlen = ext.ip4->ip_hl << 2;
+		hlen = ext.ip4hlen;
 #ifdef INET6
 	} else if (ext.ip6) {
 		offload |= IXL_TX_DESC_CMD_IIPT_IPV6;
@@ -2844,10 +2844,12 @@ ixl_tx_setup_offload(struct mbuf *m0, st
 
 	if (ext.tcp && ISSET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
 		offload |= IXL_TX_DESC_CMD_L4T_EOFT_TCP;
-		offload |= (uint64_t)ext.tcp->th_off << IXL_TX_DESC_L4LEN_SHIFT;
+		offload |= (uint64_t)(ext.tcphlen >> 2)
+		    << IXL_TX_DESC_L4LEN_SHIFT;
 	} else if (ext.udp && ISSET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
 		offload |= IXL_TX_DESC_CMD_L4T_EOFT_UDP;
-		offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
+		offload |= (uint64_t)(sizeof(*ext.udp) >> 2)
+		    << IXL_TX_DESC_L4LEN_SHIFT;
 	}
 
 	if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
@@ -2855,7 +2857,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
 			struct ixl_tx_desc *ring, *txd;
 			uint64_t cmd = 0, paylen, outlen;
 
-			hlen += ext.tcp->th_off << 2;
+			hlen += ext.tcphlen;
 
 			outlen = m0->m_pkthdr.ph_mss;
 			paylen = m0->m_pkthdr.len - ETHER_HDR_LEN - hlen;
Index: dev/pv/if_vio.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pv/if_vio.c,v
diff -u -p -r1.29 if_vio.c
--- dev/pv/if_vio.c	20 Dec 2023 09:51:06 -0000	1.29
+++ dev/pv/if_vio.c	11 Feb 2024 19:52:37 -0000
@@ -765,7 +765,7 @@ again:
 				hdr->csum_offset = offsetof(struct udphdr, uh_sum);
 
 			if (ext.ip4)
-				hdr->csum_start += ext.ip4->ip_hl << 2;
+				hdr->csum_start += ext.ip4hlen;
 #ifdef INET6
 			else if (ext.ip6)
 				hdr->csum_start += sizeof(*ext.ip6);
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
diff -u -p -r1.291 if_ethersubr.c
--- net/if_ethersubr.c	27 Jul 2023 20:21:25 -0000	1.291
+++ net/if_ethersubr.c	11 Feb 2024 19:52:37 -0000
@@ -140,6 +140,20 @@ didn't get a copy, you may request one f
 #include <netmpls/mpls.h>
 #endif /* MPLS */
 
+/* #define ETHERDEBUG 1 */
+#ifdef ETHERDEBUG
+int etherdebug = ETHERDEBUG;
+#define DNPRINTF(level, fmt, args...)					\
+	do {								\
+		if (etherdebug >= level)				\
+			printf("%s: " fmt "\n", __func__, ## args);	\
+	} while (0)
+#else
+#define DNPRINTF(level, fmt, args...)					\
+	do { } while (0)
+#endif
+#define DPRINTF(fmt, args...)	DNPRINTF(1, fmt, args)
+
 u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
     { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 u_int8_t etheranyaddr[ETHER_ADDR_LEN] =
@@ -1034,56 +1048,126 @@ ether_e64_to_addr(struct ether_addr *ea,
 
 /* Parse different TCP/IP protocol headers for a quick view inside an mbuf. */
 void
-ether_extract_headers(struct mbuf *mp, struct ether_extracted *ext)
+ether_extract_headers(struct mbuf *m0, struct ether_extracted *ext)
 {
 	struct mbuf	*m;
-	uint64_t	 hlen;
+	size_t		 hlen;
 	int		 hoff;
 	uint8_t		 ipproto;
 	uint16_t	 ether_type;
+	/* gcc 4.2.1 on sparc64 may create 32 bit loads on unaligned mbuf */
+	union {
+		u_char	hc_data;
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+		struct {
+			u_int	hl:4,	/* header length */
+				v:4;	/* version */
+		} hc_ip;
+		struct {
+			u_int	x2:4,	/* (unused) */
+				off:4;	/* data offset */
+		} hc_th;
+#endif
+#if _BYTE_ORDER == _BIG_ENDIAN
+		struct {
+			u_int	v:4,	/* version */
+				hl:4;	/* header length */
+		} hc_ip;
+		struct {
+			u_int	off:4,	/* data offset */
+				x2:4;	/* (unused) */
+		} hc_th;
+#endif
+	} hdrcpy;
 
 	/* Return NULL if header was not recognized. */
 	memset(ext, 0, sizeof(*ext));
 
-	if (mp->m_len < sizeof(*ext->eh))
-		return;
+	KASSERT(ISSET(m0->m_flags, M_PKTHDR));
+	ext->paylen = m0->m_pkthdr.len;
 
-	ext->eh = mtod(mp, struct ether_header *);
+	if (m0->m_len < sizeof(*ext->eh)) {
+		DPRINTF("m_len %d, eh %zu", m0->m_len, sizeof(*ext->eh));
+		return;
+	}
+	ext->eh = mtod(m0, struct ether_header *);
 	ether_type = ntohs(ext->eh->ether_type);
 	hlen = sizeof(*ext->eh);
+	if (ext->paylen < hlen) {
+		DPRINTF("paylen %u, ehlen %zu", ext->paylen, hlen);
+		ext->eh = NULL;
+		return;
+	}
+	ext->paylen -= hlen;
 
 #if NVLAN > 0
 	if (ether_type == ETHERTYPE_VLAN) {
-		ext->evh = mtod(mp, struct ether_vlan_header *);
+		if (m0->m_len < sizeof(*ext->evh)) {
+			DPRINTF("m_len %d, evh %zu",
+			    m0->m_len, sizeof(*ext->evh));
+			return;
+		}
+		ext->evh = mtod(m0, struct ether_vlan_header *);
 		ether_type = ntohs(ext->evh->evl_proto);
 		hlen = sizeof(*ext->evh);
+		if (sizeof(*ext->eh) + ext->paylen < hlen) {
+			DPRINTF("paylen %zu, evhlen %zu",
+			    sizeof(*ext->eh) + ext->paylen, hlen);
+			ext->evh = NULL;
+			return;
+		}
+		ext->paylen = sizeof(*ext->eh) + ext->paylen - hlen;
 	}
 #endif
 
 	switch (ether_type) {
 	case ETHERTYPE_IP:
-		m = m_getptr(mp, hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4))
+		m = m_getptr(m0, hlen, &hoff);
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4)) {
+			DPRINTF("m_len %d, hoff %d, ip4 %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->ip4));
 			return;
+		}
 		ext->ip4 = (struct ip *)(mtod(m, caddr_t) + hoff);
 
-		if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
+		memcpy(&hdrcpy.hc_data, ext->ip4, 1);
+		hlen = hdrcpy.hc_ip.hl << 2;
+		if (m->m_len - hoff < hlen) {
+			DPRINTF("m_len %d, hoff %d, iphl %zu",
+			    m ? m->m_len : -1, hoff, hlen);
+			ext->ip4 = NULL;
 			return;
-
-		hlen = ext->ip4->ip_hl << 2;
+		}
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, ip4hlen %zu", ext->paylen, hlen);
+			ext->ip4 = NULL;
+			return;
+		}
+		ext->ip4hlen = hlen;
+		ext->paylen -= hlen;
 		ipproto = ext->ip4->ip_p;
 
+		if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
+			return;
 		break;
 #ifdef INET6
 	case ETHERTYPE_IPV6:
-		m = m_getptr(mp, hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6))
+		m = m_getptr(m0, hlen, &hoff);
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6)) {
+			DPRINTF("m_len %d, hoff %d, ip6 %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->ip6));
 			return;
+		}
 		ext->ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
 
 		hlen = sizeof(*ext->ip6);
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, ip6hlen %zu", ext->paylen, hlen);
+			ext->ip6 = NULL;
+			return;
+		}
+		ext->paylen -= hlen;
 		ipproto = ext->ip6->ip6_nxt;
-
 		break;
 #endif
 	default:
@@ -1093,16 +1177,51 @@ ether_extract_headers(struct mbuf *mp, s
 	switch (ipproto) {
 	case IPPROTO_TCP:
 		m = m_getptr(m, hoff + hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp))
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp)) {
+			DPRINTF("m_len %d, hoff %d, tcp %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
 			return;
+		}
 		ext->tcp = (struct tcphdr *)(mtod(m, caddr_t) + hoff);
+
+		memcpy(&hdrcpy.hc_data, &ext->tcp->th_flags - 1, 1);
+		hlen = hdrcpy.hc_th.off << 2;
+		if (m->m_len - hoff < hlen) {
+			DPRINTF("m_len %d, hoff %d, thoff %zu",
+			    m ? m->m_len : -1, hoff, hlen);
+			ext->tcp = NULL;
+			return;
+		}
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, tcphlen %zu", ext->paylen, hlen);
+			ext->tcp = NULL;
+			return;
+		}
+		ext->tcphlen = hlen;
+		ext->paylen -= hlen;
 		break;
 
 	case IPPROTO_UDP:
 		m = m_getptr(m, hoff + hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->udp))
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->udp)) {
+			DPRINTF("m_len %d, hoff %d, tcp %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
 			return;
+		}
 		ext->udp = (struct udphdr *)(mtod(m, caddr_t) + hoff);
+
+		hlen = sizeof(*ext->udp);
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, udphlen %zu", ext->paylen, hlen);
+			ext->udp = NULL;
+			return;
+		}
 		break;
 	}
+
+	DNPRINTF(2, "%s%s%s%s%s%s ip4h %u, tcph %u, payl %u",
+	    ext->eh ? "eh," : "", ext->evh ? "evh," : "",
+	    ext->ip4 ? "ip4," : "", ext->ip6 ? "ip6," : "",
+	    ext->tcp ? "tcp," : "", ext->udp ? "udp," : "",
+	    ext->ip4hlen, ext->tcphlen, ext->paylen);
 }
Index: netinet/if_ether.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.h,v
diff -u -p -r1.90 if_ether.h
--- netinet/if_ether.h	27 Jul 2023 20:21:25 -0000	1.90
+++ netinet/if_ether.h	11 Feb 2024 19:52:37 -0000
@@ -307,6 +307,9 @@ struct ether_extracted {
 	struct ip6_hdr			*ip6;
 	struct tcphdr			*tcp;
 	struct udphdr			*udp;
+	u_int				 ip4hlen;
+	u_int				 tcphlen;
+	u_int				 paylen;
 };
 
 void ether_extract_headers(struct mbuf *, struct ether_extracted *);