Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: vport/veb(4): use/fix checksum offload
To:
Jan Klemkow <jan@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sun, 29 Jun 2025 19:57:15 +0200

Download raw body.

Thread
On Fri, Jun 27, 2025 at 08:06:42PM +0200, Jan Klemkow wrote:
> Hi,
> 
> This diff enables checksum offload in vport(4) and fixes checksum
> offload in veb(4) in some corner cases.  If we get packages with
> M_TCP_CSUM_OUT from an interface.
> 
> When we bridge two vio(4) interfaces via veb(4), we will lost the
> VIRTIO_NET_HDR_F_NEEDS_CSUM flags, which is encodes in our mbuf via
> M_TCP_CSUM_OUT flag.
> 
> In the case of bridging vio(4) with an physical interface (e.g. em(4),
> we also have to M_TCP_CSUM_OUT flag.  So, the hardware is able to
> calculate the correct checksum.  Or, when we bridge vio(4) with an
> interface unable to calculate we have to do this here, before sending
> the packet to the interface.
> 
> ok?

Crashes in my veb(4) test with vio(4)

panic: kernel diagnostic assertion "ISSET(m0->m_flags, M_PKTHDR)" failed: file "/usr/src/sys/net/if_ethersubr.c", line 1098
Stopped at      db_enter+0x14:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  70180  44954      0    0x100002          0    1  udpbench
db_enter() at db_enter+0x14
panic(ffffffff825de814) at panic+0xd5
__assert(ffffffff8261c88d,ffffffff825b6f99,44a,ffffffff825d464e) at __assert+0x29
ether_extract_headers(fffffd80ffff7600,ffff80002d681218) at ether_extract_headers+0x369
vio_tx_offload(fffffd800bcfe8ac,fffffd80ffff7600) at vio_tx_offload+0x3b
vio_start(ffff800000355308) at vio_start+0x270
ifq_serialize(ffff800000355308,ffff800000355420) at ifq_serialize+0x102
vio_txeof(ffff8000003558c0) at vio_txeof+0xfe
vio_tx_intr(ffff8000003558c0) at vio_tx_intr+0x42
vio_queue_intr(ffff8000000f7c00) at vio_queue_intr+0x33
intr_handler(ffff80002d6814d0,ffff800000086100) at intr_handler+0xa4
Xintr_ioapic_edge27_untramp() at Xintr_ioapic_edge27_untramp+0x18f
acpicpu_idle() at acpicpu_idle+0x2b3
sched_idle(ffffffff82a23ff0) at sched_idle+0x24b
end trace frame: 0x0, count: 1
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}> 

ddb{0}> show panic
*cpu0: kernel diagnostic assertion "ISSET(m0->m_flags, M_PKTHDR)" failed: file "/usr/src/sys/net/if_ethersubr.c", line 1098

ddb{0}> trace
db_enter() at db_enter+0x14
panic(ffffffff825de814) at panic+0xd5
__assert(ffffffff8261c88d,ffffffff825b6f99,44a,ffffffff825d464e) at __assert+0x29
ether_extract_headers(fffffd80ffff7600,ffff80002d681218) at ether_extract_headers+0x369
vio_tx_offload(fffffd800bcfe8ac,fffffd80ffff7600) at vio_tx_offload+0x3b
vio_start(ffff800000355308) at vio_start+0x270
ifq_serialize(ffff800000355308,ffff800000355420) at ifq_serialize+0x102
vio_txeof(ffff8000003558c0) at vio_txeof+0xfe
vio_tx_intr(ffff8000003558c0) at vio_tx_intr+0x42
vio_queue_intr(ffff8000000f7c00) at vio_queue_intr+0x33
intr_handler(ffff80002d6814d0,ffff800000086100) at intr_handler+0xa4
Xintr_ioapic_edge27_untramp() at Xintr_ioapic_edge27_untramp+0x18f
acpicpu_idle() at acpicpu_idle+0x2b3
sched_idle(ffffffff82a23ff0) at sched_idle+0x24b
end trace frame: 0x0, count: -14

ddb{0}> show register
rdi                              0x9
rsi                             0x10
rbp               0xffff80002d681110
rbx               0xffff800000355000
rdx               0xc000000000000000
rcx                            0x282
rax                             0x7c
r8                 0x101010101010101
r9                0x8080808080808080
r10               0xf95d69ffd233c548
r11               0x94022472144329bb
r12               0xffff8000000f7c00
r13               0xfffffd80ffff7600
r14                                0
r15               0xffffffff82a24bb0    cpu_info_full_primary+0x2bb0
rip               0xffffffff81cfda14    db_enter+0x14
cs                               0x8
rflags                         0x286
rsp               0xffff80002d681110
ss                              0x10
db_enter+0x14:  popq    %rbp

ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 44954   70180  68981      0  7    0x100002                udpbench
 18730  213544  68981      0  3    0x10008a  kqread        ssh
 68981  463341  64032      0  3        0x82  kqread        perl
 59672  434786  51963   1000  3    0x100082  kqread        tcpbench
 51963  288000  53359   1000  3    0x10008a  sigsusp       sh
 53359  223205      1      0  3    0x100088  sigsusp       ksh
 64032  211027  71690      0  3        0x82  piperd        perl
 71690  462470  87683      0  3    0x10008a  sigsusp       ksh
 87683  480153  28301      0  3        0x98  kqread        sshd-session
 28301  140575   8797      0  3        0x92  kqread        sshd-session
 85747  212758      1      0  3    0x100083  ttyin         getty
 85603  211656      1      0  3    0x100098  kqread        cron
  5783  195521      1     99  3   0x1100090  kqread        sndiod
 98573   44834      1    110  3    0x100090  kqread        sndiod
   623  432699  31607     95  3   0x1100092  kqread        smtpd
 18936  371764  31607    103  3   0x1100092  kqread        smtpd
 24690  135323  31607     95  3   0x1100092  kqread        smtpd
 94456  126137  31607     95  3    0x100092  kqread        smtpd
 36742  382638  31607     95  3   0x1100092  kqread        smtpd
 45105  476298  31607     95  3   0x1100092  kqread        smtpd
 31607  379511      1      0  3    0x100080  kqread        smtpd
 56878  100046  84185     91  3        0x92  kqread        snmpd_metrics
  4101  476944  84185     91  3   0x1100092  kqread        snmpd
 84185  244022      1      0  3    0x100080  kqread        snmpd
  8797   73082      1      0  3        0x88  kqread        sshd
 27401  139496      0      0  3     0x14200  acct          acct
 99803  180292      0      0  3     0x14280  nfsidl        nfsio
   926  257162      0      0  3     0x14280  nfsidl        nfsio
 88484  142040      0      0  3     0x14280  nfsidl        nfsio
 96639  204245      0      0  3     0x14280  nfsidl        nfsio
  3001   55396      1      0  3    0x100080  kqread        ntpd
 81549  428444  60028     83  3    0x100092  kqread        ntpd
 60028  222797      1     83  3   0x1100092  kqread        ntpd
 30403   16656  22568     74  3   0x1100092  bpf           pflogd
 22568  176329      1      0  3        0x80  sbwait        pflogd
 75162  292851  71649     73  3   0x1100090  kqread        syslogd
 71649  377859      1      0  3    0x100082  sbwait        syslogd
 58958  395207  80793     77  3    0x100092  kqread        dhcpleased
 53741  368806  80793     77  3    0x100092  kqread        dhcpleased
 80793  157232      1      0  3        0x80  kqread        dhcpleased
 74531  514812  60937    115  3    0x100092  kqread        slaacd
 12967  210884  60937    115  3    0x100092  kqread        slaacd
 60937  518830      1      0  3    0x100080  kqread        slaacd
 30661  215051      0      0  3     0x14200  bored         smr
 75468  322573      0      0  3     0x14200  pgzero        zerothread
 41240  355155      0      0  3     0x14200  aiodoned      aiodoned
 91424  416335      0      0  3     0x14200  syncer        update
 80407  463168      0      0  3     0x14200  cleaner       cleaner
  7592  286484      0      0  3     0x14200  reaper        reaper
 35423   62567      0      0  3     0x14200  pgdaemon      pagedaemon
 55853  514324      0      0  3     0x14200  bored         viomb
 39824  222834      0      0  3     0x14200  usbtsk        usbtask
 20917  148211      0      0  3     0x14200  usbatsk       usbatsk
 19096  139930      0      0  3  0x40014200  acpi0         acpi0
 53381  364941      0      0  7  0x40014200                idle3
 21425  415595      0      0  7  0x40014200                idle2
 77843  276372      0      0  3  0x40014200                idle1
 82580  115805      0      0  3     0x14200  bored         softnet3
 20516  152121      0      0  3     0x14200  bored         softnet2
 62562   99051      0      0  3     0x14200  bored         softnet1
 60404  128064      0      0  3     0x14200  bored         softnet0
 77233  238200      0      0  3     0x14200  bored         systqmp
 93135  458072      0      0  3     0x14200  bored         systq
 94638  124989      0      0  3     0x14200  tmoslp        softclockmp
 94800   65026      0      0  3  0x40014200  tmoslp        softclock
*64558  189310      0      0  7  0x40014200                idle0
     1  300851      0      0  3        0x82  wait          init
     0       0     -1      0  3  0x10010200  scheduler     swapper

> Index: net/if_veb.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_veb.c,v
> diff -u -p -r1.38 if_veb.c
> --- net/if_veb.c	27 Jun 2025 09:25:57 -0000	1.38
> +++ net/if_veb.c	27 Jun 2025 17:33:10 -0000
> @@ -948,6 +948,9 @@ veb_ipsec_out(struct ifnet *ifp0, struct
>  static struct mbuf *
>  veb_offload(struct ifnet *ifp, struct ifnet *ifp0, struct mbuf *m)
>  {
> +	struct ether_extracted ext;
> +	int csum = 0;
> +
>  #if NVLAN > 0
>  	if (ISSET(m->m_flags, M_VLANTAG) &&
>  	    !ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING)) {
> @@ -956,14 +959,56 @@ veb_offload(struct ifnet *ifp, struct if
>  		 * support, inject one in software.
>  		 */
>  		m = vlan_inject(m, ETHERTYPE_VLAN, m->m_pkthdr.ether_vtag);
> -		if (m == NULL) {
> -			counters_inc(ifp->if_counters, ifc_ierrors);
> -			return NULL;
> -		}
> +		if (m == NULL)
> +			goto drop;
>  	}
>  #endif
>  
> +	if (ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) &&
> +	    !ISSET(ifp0->if_capabilities, IFCAP_CSUM_IPv4))
> +		csum = 1;
> +
> +	if (ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT) &&
> +	    (!ISSET(ifp0->if_capabilities, IFCAP_CSUM_TCPv4) ||
> +	     !ISSET(ifp0->if_capabilities, IFCAP_CSUM_TCPv6)))
> +		csum = 1;
> +
> +	if (ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT) &&
> +	    (!ISSET(ifp0->if_capabilities, IFCAP_CSUM_UDPv4) ||
> +	     !ISSET(ifp0->if_capabilities, IFCAP_CSUM_UDPv6)))
> +		csum = 1;
> +
> +	if (csum) {
> +		int adjlen;
> +
> +		ether_extract_headers(m, &ext);
> +		if (ext.eh)
> +			adjlen = sizeof *ext.eh;
> +		else if (ext.evh)
> +			adjlen = sizeof *ext.evh;
> +		else
> +			goto drop;
> +
> +		m_adj(m, adjlen);
> +
> +		if (ext.ip4) {
> +			in_hdr_cksum_out(m, ifp0);
> +			in_proto_cksum_out(m, ifp0);
> +#ifdef INET6
> +		} else if (ext.ip6) {
> +			in6_proto_cksum_out(m, ifp0);
> +#endif
> +		}
> +		m = m_prepend(m, adjlen, 0);
> +		if (m == NULL)
> +			goto drop;
> +	}
> +
>  	return m;
> +
> + drop:
> +	counters_inc(ifp->if_counters, ifc_ierrors);
> +	return NULL;
>  }
>  
>  static void
> @@ -2380,7 +2425,15 @@ vport_clone_create(struct if_clone *ifc,
>  	ifp->if_qstart = vport_start;
>  	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>  	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
> -	ifp->if_capabilities = IFCAP_VLAN_HWTAGGING;
> +
> +	ifp->if_capabilities = 0;
> +#if NVLAN > 0
> +	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
> +#endif
> +	ifp->if_capabilities |= IFCAP_CSUM_IPv4;
> +	ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
> +	ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
> +
>  	ether_fakeaddr(ifp);
>  
>  	if_counters_alloc(ifp);
> 
>