Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
ether extract headers alingment memcpy
To:
tech@openbsd.org
Date:
Wed, 7 Feb 2024 01:07:15 +0100

Download raw body.

Thread
Hi,

This is my next aproach to retract IP and TCP header information
from an mbuf in driver level.  Idea is to use memcpy() to move data
from the unaligned header bit fields to integers.

I create a union of bytes and bits on the stack.  memcpy() gets the
byte from the mbuf, compiler cares about alingment on the stack.
A lot of code to trick gcc into generating byte access on sparc64.

The IP and TCP header fields are stored in struct ether_extracted
so the drivers can just use them without memcpy() magic.

I also added more length checks.  Do clear pointers to headers if
they have bgous length information.  On top I also track packet and
payload length and also check them for consistency.  I think this
is better than putting stupid values into driver registers based
on evil packets.

To make driver debugging easier, I added a lot of debug prints if
packets look strange.

Basic testing seems to work, but I will test on more network hardware
and architectures.  Diff is not ready for commit.

Is the idea with memcpy() and union of bit fields a good one?
Are there arguments against more consistency checks at driver level?
Do we want all the debug prints?
Should we keep the m_pkthdr.len checks and track the payload?

bluhm

Index: dev/pci/if_bnxt.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_bnxt.c,v
diff -u -p -r1.45 if_bnxt.c
--- dev/pci/if_bnxt.c	19 Jan 2024 03:25:13 -0000	1.45
+++ dev/pci/if_bnxt.c	6 Feb 2024 22:20:15 -0000
@@ -1433,13 +1433,13 @@ bnxt_start(struct ifqueue *ifq)
 				lflags |= TX_BD_LONG_LFLAGS_LSO;
 				hdrsize = sizeof(*ext.eh);
 				if (ext.ip4)
-					hdrsize += ext.ip4->ip_hl << 2;
+					hdrsize += ext.ip4hlen;
 				else if (ext.ip6)
 					hdrsize += sizeof(*ext.ip6);
 				else
 					tcpstat_inc(tcps_outbadtso);
 
-				hdrsize += ext.tcp->th_off << 2;
+				hdrsize += ext.tcphlen;
 				txhi->hdr_size = htole16(hdrsize / 2);
 
 				outlen = m->m_pkthdr.ph_mss;
Index: dev/pci/if_em.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -r1.371 if_em.c
--- dev/pci/if_em.c	28 Jan 2024 18:42:58 -0000	1.371
+++ dev/pci/if_em.c	6 Feb 2024 22:20:15 -0000
@@ -2433,7 +2433,7 @@ em_tx_ctx_setup(struct em_queue *que, st
 	vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
 
 	if (ext.ip4) {
-		iphlen = ext.ip4->ip_hl << 2;
+		iphlen = ext.ip4hlen;
 
 		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
Index: dev/pci/if_igc.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.c,v
diff -u -p -r1.15 if_igc.c
--- dev/pci/if_igc.c	23 Jan 2024 08:48:12 -0000	1.15
+++ dev/pci/if_igc.c	6 Feb 2024 22:20:15 -0000
@@ -2028,7 +2028,7 @@ igc_tx_ctx_setup(struct tx_ring *txr, st
 	ether_extract_headers(mp, &ext);
 
 	if (ext.ip4) {
-		iphlen = ext.ip4->ip_hl << 2;
+		iphlen = ext.ip4hlen;
 
 		type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
Index: dev/pci/if_ix.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
diff -u -p -r1.206 if_ix.c
--- dev/pci/if_ix.c	10 Nov 2023 15:51:20 -0000	1.206
+++ dev/pci/if_ix.c	6 Feb 2024 22:20:15 -0000
@@ -2502,7 +2502,7 @@ ixgbe_tx_offload(struct mbuf *mp, uint32
 	*vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT);
 
 	if (ext.ip4) {
-		iphlen = ext.ip4->ip_hl << 2;
+		iphlen = ext.ip4hlen;
 
 		if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
 			*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2542,7 +2542,7 @@ ixgbe_tx_offload(struct mbuf *mp, uint32
 		if (ext.tcp) {
 			uint32_t hdrlen, thlen, paylen, outlen;
 
-			thlen = ext.tcp->th_off << 2;
+			thlen = ext.tcphlen;
 
 			outlen = mp->m_pkthdr.ph_mss;
 			*mss_l4len_idx |= outlen << IXGBE_ADVTXD_MSS_SHIFT;
@@ -3277,11 +3277,11 @@ ixgbe_rxeof(struct rx_ring *rxr)
 					hdrlen += ETHER_VLAN_ENCAP_LEN;
 #endif
 				if (ext.ip4)
-					hdrlen += ext.ip4->ip_hl << 2;
+					hdrlen += ext.ip4hlen;
 				if (ext.ip6)
 					hdrlen += sizeof(*ext.ip6);
 				if (ext.tcp) {
-					hdrlen += ext.tcp->th_off << 2;
+					hdrlen += ext.tcphlen;
 					tcpstat_inc(tcps_inhwlro);
 					tcpstat_add(tcps_inpktlro, pkts);
 				} else {
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
diff -u -p -r1.95 if_ixl.c
--- dev/pci/if_ixl.c	7 Jan 2024 21:01:45 -0000	1.95
+++ dev/pci/if_ixl.c	6 Feb 2024 22:20:15 -0000
@@ -2827,7 +2827,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
 		    IXL_TX_DESC_CMD_IIPT_IPV4_CSUM :
 		    IXL_TX_DESC_CMD_IIPT_IPV4;
  
-		hlen = ext.ip4->ip_hl << 2;
+		hlen = ext.ip4hlen;
 #ifdef INET6
 	} else if (ext.ip6) {
 		offload |= IXL_TX_DESC_CMD_IIPT_IPV6;
@@ -2844,7 +2844,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
 
 	if (ext.tcp && ISSET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
 		offload |= IXL_TX_DESC_CMD_L4T_EOFT_TCP;
-		offload |= (uint64_t)ext.tcp->th_off << IXL_TX_DESC_L4LEN_SHIFT;
+		offload |= (uint64_t)ext.tcphlen << IXL_TX_DESC_L4LEN_SHIFT;
 	} else if (ext.udp && ISSET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
 		offload |= IXL_TX_DESC_CMD_L4T_EOFT_UDP;
 		offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
@@ -2855,7 +2855,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
 			struct ixl_tx_desc *ring, *txd;
 			uint64_t cmd = 0, paylen, outlen;
 
-			hlen += ext.tcp->th_off << 2;
+			hlen += ext.tcphlen;
 
 			outlen = m0->m_pkthdr.ph_mss;
 			paylen = m0->m_pkthdr.len - ETHER_HDR_LEN - hlen;
Index: dev/pv/if_vio.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pv/if_vio.c,v
diff -u -p -r1.29 if_vio.c
--- dev/pv/if_vio.c	20 Dec 2023 09:51:06 -0000	1.29
+++ dev/pv/if_vio.c	6 Feb 2024 22:20:15 -0000
@@ -765,7 +765,7 @@ again:
 				hdr->csum_offset = offsetof(struct udphdr, uh_sum);
 
 			if (ext.ip4)
-				hdr->csum_start += ext.ip4->ip_hl << 2;
+				hdr->csum_start += ext.ip4hlen;
 #ifdef INET6
 			else if (ext.ip6)
 				hdr->csum_start += sizeof(*ext.ip6);
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
diff -u -p -r1.291 if_ethersubr.c
--- net/if_ethersubr.c	27 Jul 2023 20:21:25 -0000	1.291
+++ net/if_ethersubr.c	6 Feb 2024 22:20:15 -0000
@@ -140,6 +140,20 @@ didn't get a copy, you may request one f
 #include <netmpls/mpls.h>
 #endif /* MPLS */
 
+/* #define ETHERDEBUG 1 */
+#ifdef ETHERDEBUG
+int etherdebug;
+#define DNPRINTF(level, fmt, args...)					\
+	do {								\
+		if (etherdebug >= level)				\
+			printf("%s: " fmt "\n", __func__, ## args);	\
+	} while (0)
+#else
+#define DNPRINTF(level, fmt, args...)					\
+	do { } while (0)
+#endif
+#define DPRINTF(fmt, args...)	DNPRINTF(1, fmt, args)
+
 u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
     { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 u_int8_t etheranyaddr[ETHER_ADDR_LEN] =
@@ -1034,56 +1048,126 @@ ether_e64_to_addr(struct ether_addr *ea,
 
 /* Parse different TCP/IP protocol headers for a quick view inside an mbuf. */
 void
-ether_extract_headers(struct mbuf *mp, struct ether_extracted *ext)
+ether_extract_headers(struct mbuf *m0, struct ether_extracted *ext)
 {
 	struct mbuf	*m;
-	uint64_t	 hlen;
+	size_t		 hlen;
 	int		 hoff;
 	uint8_t		 ipproto;
 	uint16_t	 ether_type;
+	/* gcc 4.2.1 on sparc64 may create 32 bit loads on unaligned mbuf */
+	union {
+		u_char	hc_data;
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+		struct {
+			u_int	hl:4,	/* header length */
+				v:4;	/* version */
+		} hc_ip;
+		struct {
+			u_int	x2:4,	/* (unused) */
+				off:4;	/* data offset */
+		} hc_th;
+#endif
+#if _BYTE_ORDER == _BIG_ENDIAN
+		struct {
+			u_int	v:4,	/* version */
+				hl:4;	/* header length */
+		} hc_ip;
+		struct {
+			u_int	off:4,	/* data offset */
+				x2:4;	/* (unused) */
+		} hc_th;
+#endif
+	} hdrcpy;
 
 	/* Return NULL if header was not recognized. */
 	memset(ext, 0, sizeof(*ext));
 
-	if (mp->m_len < sizeof(*ext->eh))
-		return;
+	KASSERT(ISSET(m0->m_flags, M_PKTHDR));
+	ext->paylen = m0->m_pkthdr.len;
 
-	ext->eh = mtod(mp, struct ether_header *);
+	if (m0->m_len < sizeof(*ext->eh)) {
+		DPRINTF("m_len %d, eh %zu", m0->m_len, sizeof(*ext->eh));
+		return;
+	}
+	ext->eh = mtod(m0, struct ether_header *);
 	ether_type = ntohs(ext->eh->ether_type);
 	hlen = sizeof(*ext->eh);
+	if (ext->paylen < hlen) {
+		DPRINTF("paylen %u, ehlen %zu", ext->paylen, hlen);
+		ext->eh = NULL;
+		return;
+	}
+	ext->paylen -= hlen;
 
 #if NVLAN > 0
 	if (ether_type == ETHERTYPE_VLAN) {
-		ext->evh = mtod(mp, struct ether_vlan_header *);
+		if (m0->m_len < sizeof(*ext->evh)) {
+			DPRINTF("m_len %d, evh %zu",
+			    m0->m_len, sizeof(*ext->evh));
+			return;
+		}
+		ext->evh = mtod(m0, struct ether_vlan_header *);
 		ether_type = ntohs(ext->evh->evl_proto);
 		hlen = sizeof(*ext->evh);
+		if (sizeof(*ext->eh) + ext->paylen < hlen) {
+			DPRINTF("paylen %zu, evhlen %zu",
+			    sizeof(*ext->eh) + ext->paylen, hlen);
+			ext->evh = NULL;
+			return;
+		}
+		ext->paylen = sizeof(*ext->eh) + ext->paylen - hlen;
 	}
 #endif
 
 	switch (ether_type) {
 	case ETHERTYPE_IP:
-		m = m_getptr(mp, hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4))
+		m = m_getptr(m0, hlen, &hoff);
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4)) {
+			DPRINTF("m_len %d, hoff %d, ip4 %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->ip4));
 			return;
+		}
 		ext->ip4 = (struct ip *)(mtod(m, caddr_t) + hoff);
 
-		if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
+		memcpy(&hdrcpy.hc_data, ext->ip4, 1);
+		hlen = hdrcpy.hc_ip.hl << 2;
+		if (m->m_len - hoff < hlen) {
+			DPRINTF("m_len %d, hoff %d, iphl %zu",
+			    m ? m->m_len : -1, hoff, hlen);
+			ext->ip4 = NULL;
 			return;
-
-		hlen = ext->ip4->ip_hl << 2;
+		}
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, ip4hlen %zu", ext->paylen, hlen);
+			ext->ip4 = NULL;
+			return;
+		}
+		ext->ip4hlen = hlen;
+		ext->paylen -= hlen;
 		ipproto = ext->ip4->ip_p;
 
+		if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
+			return;
 		break;
 #ifdef INET6
 	case ETHERTYPE_IPV6:
-		m = m_getptr(mp, hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6))
+		m = m_getptr(m0, hlen, &hoff);
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6)) {
+			DPRINTF("m_len %d, hoff %d, ip6 %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->ip6));
 			return;
+		}
 		ext->ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
 
 		hlen = sizeof(*ext->ip6);
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, ip6hlen %zu", ext->paylen, hlen);
+			ext->ip6 = NULL;
+			return;
+		}
+		ext->paylen -= hlen;
 		ipproto = ext->ip6->ip6_nxt;
-
 		break;
 #endif
 	default:
@@ -1093,16 +1177,51 @@ ether_extract_headers(struct mbuf *mp, s
 	switch (ipproto) {
 	case IPPROTO_TCP:
 		m = m_getptr(m, hoff + hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp))
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp)) {
+			DPRINTF("m_len %d, hoff %d, tcp %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
 			return;
+		}
 		ext->tcp = (struct tcphdr *)(mtod(m, caddr_t) + hoff);
+
+		memcpy(&hdrcpy.hc_data, &ext->tcp->th_flags - 1, 1);
+		hlen = hdrcpy.hc_th.off << 2;
+		if (m->m_len - hoff < hlen) {
+			DPRINTF("m_len %d, hoff %d, thoff %zu",
+			    m ? m->m_len : -1, hoff, hlen);
+			ext->tcp = NULL;
+			return;
+		}
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, tcphlen %zu", ext->paylen, hlen);
+			ext->tcp = NULL;
+			return;
+		}
+		ext->tcphlen = hlen;
+		ext->paylen -= hlen;
 		break;
 
 	case IPPROTO_UDP:
 		m = m_getptr(m, hoff + hlen, &hoff);
-		if (m == NULL || m->m_len - hoff < sizeof(*ext->udp))
+		if (m == NULL || m->m_len - hoff < sizeof(*ext->udp)) {
+			DPRINTF("m_len %d, hoff %d, tcp %zu",
+			    m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
 			return;
+		}
 		ext->udp = (struct udphdr *)(mtod(m, caddr_t) + hoff);
+
+		hlen = sizeof(*ext->udp);
+		if (ext->paylen < hlen) {
+			DPRINTF("paylen %u, udphlen %zu", ext->paylen, hlen);
+			ext->udp = NULL;
+			return;
+		}
 		break;
 	}
+
+	DNPRINTF(2, "%s%s%s%s%s%s ip4h %u, tcph %u, payl %u",
+	    ext->eh ? "eh," : "", ext->evh ? "evh," : "",
+	    ext->ip4 ? "ip4," : "", ext->ip6 ? "ip6," : "",
+	    ext->tcp ? "tcp," : "", ext->udp ? "udp," : "",
+	    ext->ip4hlen, ext->tcphlen, ext->paylen);
 }
Index: netinet/if_ether.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.h,v
diff -u -p -r1.90 if_ether.h
--- netinet/if_ether.h	27 Jul 2023 20:21:25 -0000	1.90
+++ netinet/if_ether.h	6 Feb 2024 22:20:15 -0000
@@ -307,6 +307,9 @@ struct ether_extracted {
 	struct ip6_hdr			*ip6;
 	struct tcphdr			*tcp;
 	struct udphdr			*udp;
+	u_int				 ip4hlen;
+	u_int				 tcphlen;
+	u_int				 paylen;
 };
 
 void ether_extract_headers(struct mbuf *, struct ether_extracted *);