Download raw body.
ether extract headers alingment memcpy
On Wed, Feb 07, 2024 at 01:07:15AM +0100, Alexander Bluhm wrote:
> This is my next aproach to retract IP and TCP header information
> from an mbuf in driver level. Idea is to use memcpy() to move data
> from the unaligned header bit fields to integers.
>
> I create a union of bytes and bits on the stack. memcpy() gets the
> byte from the mbuf, compiler cares about alingment on the stack.
> A lot of code to trick gcc into generating byte access on sparc64.
>
> The IP and TCP header fields are stored in struct ether_extracted
> so the drivers can just use them without memcpy() magic.
>
> I also added more length checks. Do clear pointers to headers if
> they have bgous length information. On top I also track packet and
> payload length and also check them for consistency. I think this
> is better than putting stupid values into driver registers based
> on evil packets.
>
> To make driver debugging easier, I added a lot of debug prints if
> packets look strange.
>
> Basic testing seems to work, but I will test on more network hardware
> and architectures. Diff is not ready for commit.
>
> Is the idea with memcpy() and union of bit fields a good one?
> Are there arguments against more consistency checks at driver level?
> Do we want all the debug prints?
> Should we keep the m_pkthdr.len checks and track the payload?
With this diff, all access to IP and TCP header is done with sparc64
ldub instructions.
1133 memcpy(&hdrcpy.hc_data, ext->ip4, 1);
1134 hlen = hdrcpy.hc_ip.hl << 2;
1135 if (m->m_len - hoff < hlen) {
/usr/src/sys/net/if_ethersubr.c:1133
490: c2 08 80 03 ldub [ %g2 + %g3 ], %g1
494: c2 2f a7 df stb %g1, [ %fp + 0x7df ]
/usr/src/sys/net/if_ethersubr.c:1134
498: 82 08 60 ff and %g1, 0xff, %g1
/usr/src/sys/net/if_ethersubr.c:1135
49c: c4 02 20 18 ld [ %o0 + 0x18 ], %g2
/usr/src/sys/net/if_ethersubr.c:1134
4a0: 82 08 60 0f and %g1, 0xf, %g1
4a4: 83 28 60 02 sll %g1, 2, %g1
4a8: 87 38 60 00 sra %g1, 0, %g3
/usr/src/sys/net/if_ethersubr.c:1135
1187 memcpy(&hdrcpy.hc_data, &ext->tcp->th_flags - 1, 1);
1188 hlen = hdrcpy.hc_th.off << 2;
1189 if (m->m_len - hoff < hlen) {
/usr/src/sys/net/if_ethersubr.c:1187
5f4: c4 08 60 0c ldub [ %g1 + 0xc ], %g2
5f8: c4 2f a7 df stb %g2, [ %fp + 0x7df ]
/usr/src/sys/net/if_ethersubr.c:1188
5fc: c2 5f a7 df ldx [ %fp + 0x7df ], %g1
/usr/src/sys/net/if_ethersubr.c:1189
600: c6 02 20 18 ld [ %o0 + 0x18 ], %g3
/usr/src/sys/net/if_ethersubr.c:1188
604: 83 30 70 3c srlx %g1, 0x3c, %g1
608: 83 28 60 02 sll %g1, 2, %g1
/usr/src/sys/net/if_ethersubr.c:1189
60c: 86 20 c0 04 sub %g3, %g4, %g3
/usr/src/sys/net/if_ethersubr.c:1188
610: 85 38 60 00 sra %g1, 0, %g2
/usr/src/sys/net/if_ethersubr.c:1189
I have tested it on sparc64 with em, ix, ixl.
ok?
bluhm
Index: dev/pci/if_bnxt.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_bnxt.c,v
diff -u -p -r1.45 if_bnxt.c
--- dev/pci/if_bnxt.c 19 Jan 2024 03:25:13 -0000 1.45
+++ dev/pci/if_bnxt.c 11 Feb 2024 19:52:37 -0000
@@ -1433,13 +1433,13 @@ bnxt_start(struct ifqueue *ifq)
lflags |= TX_BD_LONG_LFLAGS_LSO;
hdrsize = sizeof(*ext.eh);
if (ext.ip4)
- hdrsize += ext.ip4->ip_hl << 2;
+ hdrsize += ext.ip4hlen;
else if (ext.ip6)
hdrsize += sizeof(*ext.ip6);
else
tcpstat_inc(tcps_outbadtso);
- hdrsize += ext.tcp->th_off << 2;
+ hdrsize += ext.tcphlen;
txhi->hdr_size = htole16(hdrsize / 2);
outlen = m->m_pkthdr.ph_mss;
Index: dev/pci/if_em.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.c,v
diff -u -p -r1.371 if_em.c
--- dev/pci/if_em.c 28 Jan 2024 18:42:58 -0000 1.371
+++ dev/pci/if_em.c 11 Feb 2024 19:52:37 -0000
@@ -2433,7 +2433,7 @@ em_tx_ctx_setup(struct em_queue *que, st
vlan_macip_lens |= (sizeof(*ext.eh) << E1000_ADVTXD_MACLEN_SHIFT);
if (ext.ip4) {
- iphlen = ext.ip4->ip_hl << 2;
+ iphlen = ext.ip4hlen;
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
Index: dev/pci/if_igc.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.c,v
diff -u -p -r1.15 if_igc.c
--- dev/pci/if_igc.c 23 Jan 2024 08:48:12 -0000 1.15
+++ dev/pci/if_igc.c 11 Feb 2024 19:52:37 -0000
@@ -2028,7 +2028,7 @@ igc_tx_ctx_setup(struct tx_ring *txr, st
ether_extract_headers(mp, &ext);
if (ext.ip4) {
- iphlen = ext.ip4->ip_hl << 2;
+ iphlen = ext.ip4hlen;
type_tucmd_mlhl |= IGC_ADVTXD_TUCMD_IPV4;
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
Index: dev/pci/if_ix.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
diff -u -p -r1.206 if_ix.c
--- dev/pci/if_ix.c 10 Nov 2023 15:51:20 -0000 1.206
+++ dev/pci/if_ix.c 11 Feb 2024 19:52:37 -0000
@@ -2502,7 +2502,7 @@ ixgbe_tx_offload(struct mbuf *mp, uint32
*vlan_macip_lens |= (ethlen << IXGBE_ADVTXD_MACLEN_SHIFT);
if (ext.ip4) {
- iphlen = ext.ip4->ip_hl << 2;
+ iphlen = ext.ip4hlen;
if (ISSET(mp->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT)) {
*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
@@ -2542,7 +2542,7 @@ ixgbe_tx_offload(struct mbuf *mp, uint32
if (ext.tcp) {
uint32_t hdrlen, thlen, paylen, outlen;
- thlen = ext.tcp->th_off << 2;
+ thlen = ext.tcphlen;
outlen = mp->m_pkthdr.ph_mss;
*mss_l4len_idx |= outlen << IXGBE_ADVTXD_MSS_SHIFT;
@@ -3277,11 +3277,11 @@ ixgbe_rxeof(struct rx_ring *rxr)
hdrlen += ETHER_VLAN_ENCAP_LEN;
#endif
if (ext.ip4)
- hdrlen += ext.ip4->ip_hl << 2;
+ hdrlen += ext.ip4hlen;
if (ext.ip6)
hdrlen += sizeof(*ext.ip6);
if (ext.tcp) {
- hdrlen += ext.tcp->th_off << 2;
+ hdrlen += ext.tcphlen;
tcpstat_inc(tcps_inhwlro);
tcpstat_add(tcps_inpktlro, pkts);
} else {
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
diff -u -p -r1.95 if_ixl.c
--- dev/pci/if_ixl.c 7 Jan 2024 21:01:45 -0000 1.95
+++ dev/pci/if_ixl.c 11 Feb 2024 19:52:37 -0000
@@ -2827,7 +2827,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
IXL_TX_DESC_CMD_IIPT_IPV4_CSUM :
IXL_TX_DESC_CMD_IIPT_IPV4;
- hlen = ext.ip4->ip_hl << 2;
+ hlen = ext.ip4hlen;
#ifdef INET6
} else if (ext.ip6) {
offload |= IXL_TX_DESC_CMD_IIPT_IPV6;
@@ -2844,10 +2844,12 @@ ixl_tx_setup_offload(struct mbuf *m0, st
if (ext.tcp && ISSET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
offload |= IXL_TX_DESC_CMD_L4T_EOFT_TCP;
- offload |= (uint64_t)ext.tcp->th_off << IXL_TX_DESC_L4LEN_SHIFT;
+ offload |= (uint64_t)(ext.tcphlen >> 2)
+ << IXL_TX_DESC_L4LEN_SHIFT;
} else if (ext.udp && ISSET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
offload |= IXL_TX_DESC_CMD_L4T_EOFT_UDP;
- offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
+ offload |= (uint64_t)(sizeof(*ext.udp) >> 2)
+ << IXL_TX_DESC_L4LEN_SHIFT;
}
if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
@@ -2855,7 +2857,7 @@ ixl_tx_setup_offload(struct mbuf *m0, st
struct ixl_tx_desc *ring, *txd;
uint64_t cmd = 0, paylen, outlen;
- hlen += ext.tcp->th_off << 2;
+ hlen += ext.tcphlen;
outlen = m0->m_pkthdr.ph_mss;
paylen = m0->m_pkthdr.len - ETHER_HDR_LEN - hlen;
Index: dev/pv/if_vio.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pv/if_vio.c,v
diff -u -p -r1.29 if_vio.c
--- dev/pv/if_vio.c 20 Dec 2023 09:51:06 -0000 1.29
+++ dev/pv/if_vio.c 11 Feb 2024 19:52:37 -0000
@@ -765,7 +765,7 @@ again:
hdr->csum_offset = offsetof(struct udphdr, uh_sum);
if (ext.ip4)
- hdr->csum_start += ext.ip4->ip_hl << 2;
+ hdr->csum_start += ext.ip4hlen;
#ifdef INET6
else if (ext.ip6)
hdr->csum_start += sizeof(*ext.ip6);
Index: net/if_ethersubr.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
diff -u -p -r1.291 if_ethersubr.c
--- net/if_ethersubr.c 27 Jul 2023 20:21:25 -0000 1.291
+++ net/if_ethersubr.c 11 Feb 2024 19:52:37 -0000
@@ -140,6 +140,20 @@ didn't get a copy, you may request one f
#include <netmpls/mpls.h>
#endif /* MPLS */
+/* #define ETHERDEBUG 1 */
+#ifdef ETHERDEBUG
+int etherdebug = ETHERDEBUG;
+#define DNPRINTF(level, fmt, args...) \
+ do { \
+ if (etherdebug >= level) \
+ printf("%s: " fmt "\n", __func__, ## args); \
+ } while (0)
+#else
+#define DNPRINTF(level, fmt, args...) \
+ do { } while (0)
+#endif
+#define DPRINTF(fmt, args...) DNPRINTF(1, fmt, args)
+
u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
u_int8_t etheranyaddr[ETHER_ADDR_LEN] =
@@ -1034,56 +1048,126 @@ ether_e64_to_addr(struct ether_addr *ea,
/* Parse different TCP/IP protocol headers for a quick view inside an mbuf. */
void
-ether_extract_headers(struct mbuf *mp, struct ether_extracted *ext)
+ether_extract_headers(struct mbuf *m0, struct ether_extracted *ext)
{
struct mbuf *m;
- uint64_t hlen;
+ size_t hlen;
int hoff;
uint8_t ipproto;
uint16_t ether_type;
+ /* gcc 4.2.1 on sparc64 may create 32 bit loads on unaligned mbuf */
+ union {
+ u_char hc_data;
+#if _BYTE_ORDER == _LITTLE_ENDIAN
+ struct {
+ u_int hl:4, /* header length */
+ v:4; /* version */
+ } hc_ip;
+ struct {
+ u_int x2:4, /* (unused) */
+ off:4; /* data offset */
+ } hc_th;
+#endif
+#if _BYTE_ORDER == _BIG_ENDIAN
+ struct {
+ u_int v:4, /* version */
+ hl:4; /* header length */
+ } hc_ip;
+ struct {
+ u_int off:4, /* data offset */
+ x2:4; /* (unused) */
+ } hc_th;
+#endif
+ } hdrcpy;
/* Return NULL if header was not recognized. */
memset(ext, 0, sizeof(*ext));
- if (mp->m_len < sizeof(*ext->eh))
- return;
+ KASSERT(ISSET(m0->m_flags, M_PKTHDR));
+ ext->paylen = m0->m_pkthdr.len;
- ext->eh = mtod(mp, struct ether_header *);
+ if (m0->m_len < sizeof(*ext->eh)) {
+ DPRINTF("m_len %d, eh %zu", m0->m_len, sizeof(*ext->eh));
+ return;
+ }
+ ext->eh = mtod(m0, struct ether_header *);
ether_type = ntohs(ext->eh->ether_type);
hlen = sizeof(*ext->eh);
+ if (ext->paylen < hlen) {
+ DPRINTF("paylen %u, ehlen %zu", ext->paylen, hlen);
+ ext->eh = NULL;
+ return;
+ }
+ ext->paylen -= hlen;
#if NVLAN > 0
if (ether_type == ETHERTYPE_VLAN) {
- ext->evh = mtod(mp, struct ether_vlan_header *);
+ if (m0->m_len < sizeof(*ext->evh)) {
+ DPRINTF("m_len %d, evh %zu",
+ m0->m_len, sizeof(*ext->evh));
+ return;
+ }
+ ext->evh = mtod(m0, struct ether_vlan_header *);
ether_type = ntohs(ext->evh->evl_proto);
hlen = sizeof(*ext->evh);
+ if (sizeof(*ext->eh) + ext->paylen < hlen) {
+ DPRINTF("paylen %zu, evhlen %zu",
+ sizeof(*ext->eh) + ext->paylen, hlen);
+ ext->evh = NULL;
+ return;
+ }
+ ext->paylen = sizeof(*ext->eh) + ext->paylen - hlen;
}
#endif
switch (ether_type) {
case ETHERTYPE_IP:
- m = m_getptr(mp, hlen, &hoff);
- if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4))
+ m = m_getptr(m0, hlen, &hoff);
+ if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4)) {
+ DPRINTF("m_len %d, hoff %d, ip4 %zu",
+ m ? m->m_len : -1, hoff, sizeof(*ext->ip4));
return;
+ }
ext->ip4 = (struct ip *)(mtod(m, caddr_t) + hoff);
- if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
+ memcpy(&hdrcpy.hc_data, ext->ip4, 1);
+ hlen = hdrcpy.hc_ip.hl << 2;
+ if (m->m_len - hoff < hlen) {
+ DPRINTF("m_len %d, hoff %d, iphl %zu",
+ m ? m->m_len : -1, hoff, hlen);
+ ext->ip4 = NULL;
return;
-
- hlen = ext->ip4->ip_hl << 2;
+ }
+ if (ext->paylen < hlen) {
+ DPRINTF("paylen %u, ip4hlen %zu", ext->paylen, hlen);
+ ext->ip4 = NULL;
+ return;
+ }
+ ext->ip4hlen = hlen;
+ ext->paylen -= hlen;
ipproto = ext->ip4->ip_p;
+ if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
+ return;
break;
#ifdef INET6
case ETHERTYPE_IPV6:
- m = m_getptr(mp, hlen, &hoff);
- if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6))
+ m = m_getptr(m0, hlen, &hoff);
+ if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6)) {
+ DPRINTF("m_len %d, hoff %d, ip6 %zu",
+ m ? m->m_len : -1, hoff, sizeof(*ext->ip6));
return;
+ }
ext->ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
hlen = sizeof(*ext->ip6);
+ if (ext->paylen < hlen) {
+ DPRINTF("paylen %u, ip6hlen %zu", ext->paylen, hlen);
+ ext->ip6 = NULL;
+ return;
+ }
+ ext->paylen -= hlen;
ipproto = ext->ip6->ip6_nxt;
-
break;
#endif
default:
@@ -1093,16 +1177,51 @@ ether_extract_headers(struct mbuf *mp, s
switch (ipproto) {
case IPPROTO_TCP:
m = m_getptr(m, hoff + hlen, &hoff);
- if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp))
+ if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp)) {
+ DPRINTF("m_len %d, hoff %d, tcp %zu",
+ m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
return;
+ }
ext->tcp = (struct tcphdr *)(mtod(m, caddr_t) + hoff);
+
+ memcpy(&hdrcpy.hc_data, &ext->tcp->th_flags - 1, 1);
+ hlen = hdrcpy.hc_th.off << 2;
+ if (m->m_len - hoff < hlen) {
+ DPRINTF("m_len %d, hoff %d, thoff %zu",
+ m ? m->m_len : -1, hoff, hlen);
+ ext->tcp = NULL;
+ return;
+ }
+ if (ext->paylen < hlen) {
+ DPRINTF("paylen %u, tcphlen %zu", ext->paylen, hlen);
+ ext->tcp = NULL;
+ return;
+ }
+ ext->tcphlen = hlen;
+ ext->paylen -= hlen;
break;
case IPPROTO_UDP:
m = m_getptr(m, hoff + hlen, &hoff);
- if (m == NULL || m->m_len - hoff < sizeof(*ext->udp))
+ if (m == NULL || m->m_len - hoff < sizeof(*ext->udp)) {
+ DPRINTF("m_len %d, hoff %d, tcp %zu",
+ m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
return;
+ }
ext->udp = (struct udphdr *)(mtod(m, caddr_t) + hoff);
+
+ hlen = sizeof(*ext->udp);
+ if (ext->paylen < hlen) {
+ DPRINTF("paylen %u, udphlen %zu", ext->paylen, hlen);
+ ext->udp = NULL;
+ return;
+ }
break;
}
+
+ DNPRINTF(2, "%s%s%s%s%s%s ip4h %u, tcph %u, payl %u",
+ ext->eh ? "eh," : "", ext->evh ? "evh," : "",
+ ext->ip4 ? "ip4," : "", ext->ip6 ? "ip6," : "",
+ ext->tcp ? "tcp," : "", ext->udp ? "udp," : "",
+ ext->ip4hlen, ext->tcphlen, ext->paylen);
}
Index: netinet/if_ether.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.h,v
diff -u -p -r1.90 if_ether.h
--- netinet/if_ether.h 27 Jul 2023 20:21:25 -0000 1.90
+++ netinet/if_ether.h 11 Feb 2024 19:52:37 -0000
@@ -307,6 +307,9 @@ struct ether_extracted {
struct ip6_hdr *ip6;
struct tcphdr *tcp;
struct udphdr *udp;
+ u_int ip4hlen;
+ u_int tcphlen;
+ u_int paylen;
};
void ether_extract_headers(struct mbuf *, struct ether_extracted *);
ether extract headers alingment memcpy