Index | Thread | Search

From:
Dave Voutila <dv@sisu.io>
Subject:
Re: vmd: add checksum offload for guests
To:
Mike Larkin <mlarkin@nested.page>
Cc:
Jan Klemkow <j.klemkow@wemelug.de>, David Gwynne <david@gwynne.id.au>, Klemens Nanni <kn@openbsd.org>, Alexander Bluhm <bluhm@openbsd.org>, tech@openbsd.org
Date:
Sat, 17 Jan 2026 11:38:50 -0500

Download raw body.

Thread
Mike Larkin <mlarkin@nested.page> writes:

> On Fri, Jan 16, 2026 at 07:38:16PM +0100, Jan Klemkow wrote:
>> On Thu, Jan 15, 2026 at 02:08:43PM -0800, Mike Larkin wrote:
>> > Does this "just work" no matter what guests I run? That's really all I care
>> > about.
>>
>> Here is my current diff for checksum offloading in vmd(8).
>>
>> I tested the following combination of features:
>>
>>  - Debian/Linux and OpenBSD-current guests
>>  - OpenBSD-current vio(4) w/o all offloading features
>>  - Linux, OpenBSD and Hostsystem via veb(4) and vlan(4)
>>  - IPv4 and IPv6 with tcpbench(1)
>>  - local interface locked lladdr
>>  - local interface dhcp
>>
>> Further tests are welcome!
>>
>> ok?
>>
>> Thanks,
>> Jan
>>
>
> Not sure about dv@, but I can't really review this. it's hundreds of lines
> of changes in vmd vionet that require a level of understanding of tap(4) and
> in virtio/vionet (and the network stack in general) that I don't have.
> When I did the original vionet in vmd years ago it was pretty straightforward
> since the spec (for *all* virtio) was only like 20 pages. I was able to write
> that code in a weekend. now that we have bolted on all this other stuff, I
> don't feel comfortable giving oks in this area anymore since there is no way
> I can look at this and know if it's right or not. I think you need a network
> stack person to ok this, *and* explain what the ramifications are for vmd
> in general. It looks like vmd is doing inspection of every packet now? I
> dont think we want that.

I've spent time digging into this and better understand it now. I'm also
happy now with how the current diff isn't expanding pledges for vionet.

It feels overkill to have to poke every packet, but I do manage to see a
small improvement in the one test I did using iperf3 sending from host
to guest. It's only about 1-2% gain in throughput on my Intel x1c gen10
and less than 1% on my newer Ryzen AI 350 machine. (This was using a
-current snapshot for the guest.)

I did this both with the "local interface" (where we already inspect
each packet to intercept DHCP packets) and one added to a veb(4) device
with and accompanying host-side vport(4).

My hypothesis is the gain is mostly due to offloading work from the
single-vcpu guest to the host vionet tx or rx threads.

Is it worth it? Especially knowing we're technically shortcutting the
actual spec as written by attesting for every packet checksum being
good? /shrug

Does someone have a better benchmark showing this moves the needle?

>
> -ml
>
>> Index: sys/kern/kern_pledge.c
>> ===================================================================
>> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
>> diff -u -p -r1.335 kern_pledge.c
>> --- sys/kern/kern_pledge.c	13 Nov 2025 20:59:14 -0000	1.335
>> +++ sys/kern/kern_pledge.c	16 Jan 2026 18:24:49 -0000
>> @@ -46,6 +46,7 @@
>>  #include <net/route.h>
>>  #include <net/if.h>
>>  #include <net/if_var.h>
>> +#include <net/if_tun.h>
>>  #include <netinet/in.h>
>>  #include <netinet6/in6_var.h>
>>  #include <netinet6/nd6.h>
>> @@ -1337,6 +1338,12 @@ pledge_ioctl(struct proc *p, long com, s
>>  		    cdevsw[major(vp->v_rdev)].d_open == vmmopen) {
>>  			error = pledge_ioctl_vmm(p, com);
>>  			if (error == 0)
>> +				return 0;
>> +		}
>> +		if ((fp->f_type == DTYPE_VNODE) &&
>> +		    (vp->v_type == VCHR) &&
>> +		    (cdevsw[major(vp->v_rdev)].d_open == tapopen)) {
>> +			if (com == TUNSCAP)
>>  				return 0;
>>  		}
>>  	}
>> Index: usr.sbin/vmd/vionet.c
>> ===================================================================
>> RCS file: /cvs/src/usr.sbin/vmd/vionet.c,v
>> diff -u -p -r1.29 vionet.c
>> --- usr.sbin/vmd/vionet.c	14 Jan 2026 03:09:05 -0000	1.29
>> +++ usr.sbin/vmd/vionet.c	16 Jan 2026 18:24:50 -0000
>> @@ -22,7 +22,12 @@
>>  #include <dev/pv/virtioreg.h>
>>
>>  #include <net/if.h>
>> +#include <net/if_tun.h>
>>  #include <netinet/in.h>
>> +#include <netinet/ip.h>
>> +#include <netinet/ip6.h>
>> +#include <netinet/tcp.h>
>> +#include <netinet/udp.h>
>>  #include <netinet/if_ether.h>
>>
>>  #include <errno.h>
>> @@ -50,6 +55,7 @@
>>
>>  #define VIRTIO_NET_CONFIG_MAC		 0 /*  8 bit x 6 byte */
>>
>> +#define VIRTIO_NET_F_GUEST_CSUM	(1 << 1)
>>  #define VIRTIO_NET_F_MAC	(1 << 5)
>>  #define RXQ	0
>>  #define TXQ	1
>> @@ -65,7 +71,7 @@ static void *rx_run_loop(void *);
>>  static void *tx_run_loop(void *);
>>  static int vionet_rx(struct virtio_dev *, int);
>>  static ssize_t vionet_rx_copy(struct vionet_dev *, int, const struct iovec *,
>> -    int, size_t);
>> +    int, size_t, struct tun_hdr *th);
>>  static ssize_t vionet_rx_zerocopy(struct vionet_dev *, int,
>>      const struct iovec *, int);
>>  static void vionet_rx_event(int, short, void *);
>> @@ -84,6 +90,10 @@ static void read_pipe_rx(int, short, voi
>>  static void read_pipe_tx(int, short, void *);
>>  static void vionet_assert_pic_irq(struct virtio_dev *);
>>  static void vionet_deassert_pic_irq(struct virtio_dev *);
>> +static void vhdr2thdr(struct virtio_net_hdr *, struct tun_hdr *,
>> +    const struct iovec *, int);
>> +static void thdr2vhdr(struct tun_hdr *, struct virtio_net_hdr *,
>> +    const struct iovec *, int);
>>
>>  /* Device Globals */
>>  struct event ev_tap;
>> @@ -300,6 +310,30 @@ fail:
>>  }
>>
>>  /*
>> + * Update and sync offload features with tap(4).
>> + */
>> +static void
>> +vionet_update_offload(struct virtio_dev *dev)
>> +{
>> +	struct viodev_msg	msg;
>> +	int			ret;
>> +
>> +	memset(&msg, 0, sizeof(msg));
>> +	msg.irq = dev->irq;
>> +	msg.type = VIODEV_MSG_TUNSCAP;
>> +
>> +	if (dev->driver_feature & VIRTIO_NET_F_GUEST_CSUM) {
>> +		msg.data |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
>> +		msg.data |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
>> +	}
>> +
>> +	ret = imsg_compose_event2(&dev->async_iev, IMSG_DEVOP_MSG, 0, 0, -1,
>> +	    &msg, sizeof(msg), ev_base_main);
>> +	if (ret == -1)
>> +		log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
>> +}
>> +
>> +/*
>>   * vionet_rx
>>   *
>>   * Pull packet from the provided fd and fill the receive-side virtqueue. We
>> @@ -321,6 +355,7 @@ vionet_rx(struct virtio_dev *dev, int fd
>>  	struct virtio_net_hdr *hdr = NULL;
>>  	struct virtio_vq_info *vq_info;
>>  	struct iovec *iov;
>> +	struct tun_hdr th;
>>  	int notify = 0;
>>  	ssize_t sz;
>>  	uint8_t status = 0;
>> @@ -351,8 +386,8 @@ vionet_rx(struct virtio_dev *dev, int fd
>>  			goto reset;
>>  		}
>>
>> -		iov = &iov_rx[0];
>> -		iov_cnt = 1;
>> +		iov = &iov_rx[1];
>> +		iov_cnt = 2;
>>
>>  		/*
>>  		 * First descriptor should be at least as large as the
>> @@ -373,7 +408,6 @@ vionet_rx(struct virtio_dev *dev, int fd
>>  		if (iov->iov_base == NULL)
>>  			goto reset;
>>  		hdr = iov->iov_base;
>> -		memset(hdr, 0, sizeof(struct virtio_net_hdr));
>>
>>  		/* Tweak the iovec to account for the virtio_net_hdr. */
>>  		iov->iov_len -= sizeof(struct virtio_net_hdr);
>> @@ -418,15 +452,15 @@ vionet_rx(struct virtio_dev *dev, int fd
>>  			goto reset;
>>  		}
>>
>> -		hdr->num_buffers = iov_cnt;
>> -
>>  		/*
>>  		 * If we're enforcing hardware address or handling an injected
>>  		 * packet, we need to use a copy-based approach.
>>  		 */
>> +		iov_rx[0].iov_base = &th;
>> +		iov_rx[0].iov_len = sizeof(th);
>>  		if (vionet->lockedmac || fd != vionet->data_fd)
>> -			sz = vionet_rx_copy(vionet, fd, iov_rx, iov_cnt,
>> -			    chain_len);
>> +			sz = vionet_rx_copy(vionet, fd, iov_rx + 1, iov_cnt - 1,
>> +			    chain_len, &th);
>>  		else
>>  			sz = vionet_rx_zerocopy(vionet, fd, iov_rx, iov_cnt);
>>  		if (sz == -1)
>> @@ -434,6 +468,9 @@ vionet_rx(struct virtio_dev *dev, int fd
>>  		if (sz == 0)	/* No packets, so bail out for now. */
>>  			break;
>>
>> +		thdr2vhdr(&th, hdr, iov_rx + 1, iov_cnt - 1);
>> +		hdr->num_buffers = iov_cnt - 1;
>> +
>>  		/*
>>  		 * Account for the prefixed header since it wasn't included
>>  		 * in the copy or zerocopy operations.
>> @@ -473,9 +510,9 @@ reset:
>>   */
>>  ssize_t
>>  vionet_rx_copy(struct vionet_dev *dev, int fd, const struct iovec *iov,
>> -    int iov_cnt, size_t chain_len)
>> +    int iov_cnt, size_t chain_len, struct tun_hdr *th)
>>  {
>> -	static uint8_t		 buf[VIONET_HARD_MTU];
>> +	static uint8_t		 buf[sizeof(struct tun_hdr) + VIONET_HARD_MTU];
>>  	struct packet		*pkt = NULL;
>>  	struct ether_header	*eh = NULL;
>>  	uint8_t			*payload = buf;
>> @@ -483,9 +520,10 @@ vionet_rx_copy(struct vionet_dev *dev, i
>>  	ssize_t			 sz;
>>
>>  	/* If reading from the tap(4), try to right-size the read. */
>> -	if (fd == dev->data_fd)
>> -		nbytes = MIN(chain_len, VIONET_HARD_MTU);
>> -	else if (fd == pipe_inject[READ])
>> +	if (fd == dev->data_fd) {
>> +		nbytes = sizeof(struct tun_hdr) +
>> +		    MIN(chain_len, VIONET_HARD_MTU);
>> +	} else if (fd == pipe_inject[READ])
>>  		nbytes = sizeof(struct packet);
>>  	else {
>>  		log_warnx("%s: invalid fd: %d", __func__, fd);
>> @@ -504,10 +542,20 @@ vionet_rx_copy(struct vionet_dev *dev, i
>>  			return (-1);
>>  		}
>>  		return (0);
>> -	} else if (fd == dev->data_fd && sz < VIONET_MIN_TXLEN) {
>> +	} else if (fd == dev->data_fd) {
>> +		if ((size_t)sz < sizeof(struct tun_hdr)) {
>> +			log_warnx("%s: short tun_hdr", __func__);
>> +			return (0);
>> +		}
>> +		memcpy(th, payload, sizeof *th);
>> +		payload += sizeof(struct tun_hdr);
>> +		sz -= sizeof(struct tun_hdr);
>> +
>>  		/* If reading the tap(4), we should get valid ethernet. */
>> -		log_warnx("%s: invalid packet size", __func__);
>> -		return (0);
>> +		if (sz < VIONET_MIN_TXLEN) {
>> +			log_warnx("%s: invalid packet size", __func__);
>> +			return (0);
>> +		}
>>  	} else if (fd == pipe_inject[READ] && sz != sizeof(struct packet)) {
>>  		log_warnx("%s: invalid injected packet object (sz=%ld)",
>>  		    __func__, sz);
>> @@ -526,6 +574,7 @@ vionet_rx_copy(struct vionet_dev *dev, i
>>  			log_warnx("%s: invalid injected packet size", __func__);
>>  			goto drop;
>>  		}
>> +		memset(th, 0, sizeof *th);
>>  		payload = pkt->buf;
>>  		sz = (ssize_t)pkt->len;
>>  	}
>> @@ -585,6 +634,12 @@ vionet_rx_zerocopy(struct vionet_dev *de
>>  	sz = readv(fd, iov, iov_cnt);
>>  	if (sz == -1 && errno == EAGAIN)
>>  		return (0);
>> +
>> +	if ((size_t)sz < sizeof(struct tun_hdr))
>> +		return (0);
>> +
>> +	sz -= sizeof(struct tun_hdr);
>> +
>>  	return (sz);
>>  }
>>
>> @@ -666,6 +721,8 @@ vionet_tx(struct virtio_dev *dev)
>>  	struct iovec *iov;
>>  	struct packet pkt;
>>  	uint8_t status = 0;
>> +	struct virtio_net_hdr *vhp;
>> +	struct tun_hdr th;
>>
>>  	status = dev->status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK;
>>  	if (status != VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK) {
>> @@ -692,8 +749,10 @@ vionet_tx(struct virtio_dev *dev)
>>  			goto reset;
>>  		}
>>
>> -		iov = &iov_tx[0];
>> -		iov_cnt = 0;
>> +		/* the 0th slot will by used by the tun_hdr */
>> +
>> +		iov = &iov_tx[1];
>> +		iov_cnt = 1;
>>  		chain_len = 0;
>>
>>  		/*
>> @@ -704,13 +763,16 @@ vionet_tx(struct virtio_dev *dev)
>>  			log_warnx("%s: invalid descriptor length", __func__);
>>  			goto reset;
>>  		}
>> -		iov->iov_len = desc->len;
>>
>> -		if (iov->iov_len > sizeof(struct virtio_net_hdr)) {
>> -			/* Chop off the virtio header, leaving packet data. */
>> -			iov->iov_len -= sizeof(struct virtio_net_hdr);
>> -			iov->iov_base = hvaddr_mem(desc->addr +
>> -			    sizeof(struct virtio_net_hdr), iov->iov_len);
>> +		/* Chop the virtio net header off */
>> +		vhp = hvaddr_mem(desc->addr, sizeof(*vhp));
>> +		if (vhp == NULL)
>> +			goto reset;
>> +
>> +		iov->iov_len = desc->len - sizeof(*vhp);
>> +		if (iov->iov_len > 0) {
>> +			iov->iov_base = hvaddr_mem(desc->addr + sizeof(*vhp),
>> +			    iov->iov_len);
>>  			if (iov->iov_base == NULL)
>>  				goto reset;
>>
>> @@ -758,7 +820,7 @@ vionet_tx(struct virtio_dev *dev)
>>  		 * descriptor with packet data contains a large enough buffer
>>  		 * for this inspection.
>>  		 */
>> -		iov = &iov_tx[0];
>> +		iov = &iov_tx[1];
>>  		if (vionet->lockedmac) {
>>  			if (iov->iov_len < ETHER_HDR_LEN) {
>>  				log_warnx("%s: insufficient header data",
>> @@ -784,6 +846,15 @@ vionet_tx(struct virtio_dev *dev)
>>  			}
>>  		}
>>
>> +		/*
>> +		 * if we look at more of vhp we might need to copy
>> +		 * it so it's aligned properly
>> +		 */
>> +		vhdr2thdr(vhp, &th, iov_tx + 1, iov_cnt - 1);
>> +
>> +		iov_tx[0].iov_base = &th;
>> +		iov_tx[0].iov_len = sizeof(th);
>> +
>>  		/* Write our packet to the tap(4). */
>>  		sz = writev(vionet->data_fd, iov_tx, iov_cnt);
>>  		if (sz == -1 && errno != ENOBUFS) {
>> @@ -1114,6 +1185,7 @@ vionet_cfg_write(struct virtio_dev *dev,
>>  		dev->driver_feature &= dev->device_feature;
>>  		DPRINTF("%s: driver features 0x%llx", __func__,
>>  		    dev->driver_feature);
>> +		vionet_update_offload(dev);
>>  		break;
>>  	case VIO1_PCI_CONFIG_MSIX_VECTOR:
>>  		/* Ignore until we support MSIX. */
>> @@ -1555,6 +1627,155 @@ vionet_assert_pic_irq(struct virtio_dev
>>  	    &msg, sizeof(msg), ev_base_main);
>>  	if (ret == -1)
>>  		log_warnx("%s: failed to assert irq %d", __func__, dev->irq);
>> +}
>> +
>> +static int
>> +memcpyv(void *buf, size_t len, size_t off, const struct iovec *iov, int iovcnt)
>> +{
>> +	uint8_t *dst = buf;
>> +	size_t l;
>> +
>> +	for (;;) {
>> +		if (iovcnt == 0)
>> +			return (-1);
>> +
>> +		if (off < iov->iov_len)
>> +			break;
>> +
>> +		off -= iov->iov_len;
>> +		iov++;
>> +		iovcnt--;
>> +	}
>> +
>> +	l = off + len;
>> +	if (l > iov->iov_len)
>> +		l = iov->iov_len;
>> +	l -= off;
>> +
>> +	memcpy(dst, (const uint8_t *)iov->iov_base + off, l);
>> +	dst += l;
>> +	len -= l;
>> +
>> +	if (len == 0)
>> +		return (0);
>> +
>> +	for (;;) {
>> +		if (iovcnt == 0)
>> +			return (-1);
>> +
>> +		l = len;
>> +		if (l > iov->iov_len)
>> +			l = iov->iov_len;
>> +
>> +		memcpy(dst, (const uint8_t *)iov->iov_base, l);
>> +		dst += l;
>> +		len -= l;
>> +
>> +		if (len == 0)
>> +			break;
>> +
>> +		iov++;
>> +		iovcnt--;
>> +	}
>> +
>> +	return (0);
>> +}
>> +
>> +static void
>> +hdr_extract(const struct iovec *iov, int iovcnt, size_t *off, uint8_t *proto)
>> +{
>> +	size_t		offs;
>> +	uint16_t	etype;
>> +
>> +	if (memcpyv(&etype, sizeof(etype),
>> +	    offsetof(struct ether_header, ether_type),
>> +	    iov, iovcnt) == -1)
>> +		return;
>> +
>> +	*off = sizeof(struct ether_header);
>> +
>> +	if (etype == htons(ETHERTYPE_VLAN)) {
>> +		if (memcpyv(&etype, sizeof(etype),
>> +		    offsetof(struct ether_vlan_header, evl_proto),
>> +		    iov, iovcnt) == -1)
>> +			return;
>> +
>> +		*off = sizeof(struct ether_vlan_header);
>> +	}
>> +
>> +	if (etype == htons(ETHERTYPE_IP)) {
>> +		uint8_t hl;
>> +
>> +		/* Get ipproto field from IP header. */
>> +		offs = *off + offsetof(struct ip, ip_p);
>> +		if (memcpyv(proto, sizeof(*proto), offs, iov, iovcnt) == -1)
>> +			return;
>> +
>> +		/* Get IP header length field from IP header. */
>> +		offs = *off;
>> +		if (memcpyv(&hl, sizeof(hl), offs, iov, iovcnt) == -1)
>> +			return;
>> +
>> +		*off += (hl & 0x0f) << 2;
>> +	} else if (etype == htons(ETHERTYPE_IPV6)) {
>> +		/* Get next header field from IP header. */
>> +		offs = *off + offsetof(struct ip6_hdr, ip6_nxt);
>> +		if (memcpyv(proto, sizeof(*proto), offs, iov, iovcnt) == -1)
>> +			return;
>> +
>> +		*off += sizeof(struct ip6_hdr);
>> +	}
>> +}
>> +
>> +static void
>> +vhdr2thdr(struct virtio_net_hdr *vh, struct tun_hdr *th,
>> +    const struct iovec *iov, int iovcnt)
>> +{
>> +	memset(th, 0, sizeof(*th));
>> +
>> +	if (vh->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
>> +		size_t	off;
>> +		uint8_t	proto;
>> +
>> +		hdr_extract(iov, iovcnt, &off, &proto);
>> +
>> +		switch (proto) {
>> +		case IPPROTO_TCP:
>> +			th->th_flags |= TUN_H_TCP_CSUM;
>> +			break;
>> +
>> +		case IPPROTO_UDP:
>> +			th->th_flags |= TUN_H_UDP_CSUM;
>> +			break;
>> +		}
>> +	}
>> +}
>> +
>> +static void
>> +thdr2vhdr(struct tun_hdr *th, struct virtio_net_hdr *vh,
>> +    const struct iovec *iov, int iovcnt)
>> +{
>> +	size_t	off;
>> +	uint8_t	proto;
>> +
>> +	memset(vh, 0, sizeof(*vh));
>> +
>> +	if (th->th_flags & (TUN_H_TCP_CSUM | TUN_H_UDP_CSUM)) {
>> +		hdr_extract(iov, iovcnt, &off, &proto);
>> +
>> +		vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
>> +		vh->csum_start = off;
>> +
>> +		switch (proto) {
>> +		case IPPROTO_TCP:
>> +			vh->csum_offset = offsetof(struct tcphdr, th_sum);
>> +			break;
>> +
>> +		case IPPROTO_UDP:
>> +			vh->csum_offset = offsetof(struct udphdr, uh_sum);
>> +			break;
>> +		}
>> +	}
>>  }
>>
>>  /*
>> Index: usr.sbin/vmd/virtio.c
>> ===================================================================
>> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
>> diff -u -p -r1.134 virtio.c
>> --- usr.sbin/vmd/virtio.c	14 Jan 2026 03:09:05 -0000	1.134
>> +++ usr.sbin/vmd/virtio.c	16 Jan 2026 18:24:50 -0000
>> @@ -19,6 +19,7 @@
>>  #include <sys/param.h>	/* PAGE_SIZE */
>>  #include <sys/socket.h>
>>  #include <sys/wait.h>
>> +#include <sys/ioctl.h>
>>
>>  #include <dev/pci/pcireg.h>
>>  #include <dev/pci/pcidevs.h>
>> @@ -28,6 +29,7 @@
>>  #include <dev/vmm/vmm.h>
>>
>>  #include <net/if.h>
>> +#include <net/if_tun.h>
>>  #include <netinet/in.h>
>>  #include <netinet/if_ether.h>
>>
>> @@ -64,6 +66,8 @@ SLIST_HEAD(virtio_dev_head, virtio_dev)
>>
>>  #define MAXPHYS	(64 * 1024)	/* max raw I/O transfer size */
>>
>> +#define VIRTIO_NET_F_CSUM	(1<<0)
>> +#define VIRTIO_NET_F_GUEST_CSUM	(1<<1)
>>  #define VIRTIO_NET_F_MAC	(1<<5)
>>
>>  #define VMMCI_F_TIMESYNC	(1<<0)
>> @@ -1020,6 +1024,8 @@ virtio_init(struct vmd_vm *vm, int child
>>  	/* Virtio 1.x Network Devices */
>>  	if (vmc->vmc_nnics > 0) {
>>  		for (i = 0; i < vmc->vmc_nnics; i++) {
>> +			struct tun_capabilities	tcap;
>> +
>>  			dev = malloc(sizeof(struct virtio_dev));
>>  			if (dev == NULL) {
>>  				log_warn("calloc failure allocating vionet");
>> @@ -1034,7 +1040,8 @@ virtio_init(struct vmd_vm *vm, int child
>>  			}
>>  			virtio_dev_init(vm, dev, id, VIONET_QUEUE_SIZE_DEFAULT,
>>  			    VIRTIO_NET_QUEUES,
>> -			    (VIRTIO_NET_F_MAC | VIRTIO_F_VERSION_1));
>> +			    (VIRTIO_NET_F_MAC | VIRTIO_NET_F_CSUM |
>> +				VIRTIO_NET_F_GUEST_CSUM | VIRTIO_F_VERSION_1));
>>
>>  			if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_pci_io,
>>  			    dev) == -1) {
>> @@ -1056,6 +1063,14 @@ virtio_init(struct vmd_vm *vm, int child
>>  			dev->vmm_id = vm->vm_vmmid;
>>  			dev->vionet.data_fd = child_taps[i];
>>
>> +			/*
>> +			 * IFCAPs are tweaked after feature negotiation with
>> +			 * the guest later.
>> +			 */
>> +			memset(&tcap, 0, sizeof(tcap));
>> +			if (ioctl(dev->vionet.data_fd, TUNSCAP, &tcap) == -1)
>> +				fatal("tap(4) TUNSCAP");
>> +
>>  			/* MAC address has been assigned by the parent */
>>  			memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6);
>>  			dev->vionet.lockedmac =
>> @@ -1532,10 +1547,12 @@ virtio_dev_launch(struct vmd_vm *vm, str
>>  		}
>>
>>  		/* Close data fds. Only the child device needs them now. */
>> -		if (virtio_dev_closefds(dev) == -1) {
>> -			log_warnx("%s: failed to close device data fds",
>> -			    __func__);
>> -			goto err;
>> +		if (dev->dev_type != VMD_DEVTYPE_NET) {
>> +			if (virtio_dev_closefds(dev) == -1) {
>> +				log_warnx("%s: failed to close device data fds",
>> +				    __func__);
>> +				goto err;
>> +			}
>>  		}
>>
>>  		/* 2. Send over details on the VM (including memory fds). */
>> @@ -1758,6 +1775,18 @@ handle_dev_msg(struct viodev_msg *msg, s
>>  	case VIODEV_MSG_ERROR:
>>  		log_warnx("%s: device reported error", __func__);
>>  		break;
>> +	case VIODEV_MSG_TUNSCAP:
>> +	{
>> +		struct tun_capabilities	tcap;
>> +
>> +		memset(&tcap, 0, sizeof(tcap));
>> +		tcap.tun_if_capabilities = msg->data;
>> +
>> +		if (ioctl(gdev->vionet.data_fd, TUNSCAP, &tcap) == -1)
>> +			fatal("%s: tap(4) TUNSCAP", __func__);
>> +
>> +		break;
>> +	}
>>  	case VIODEV_MSG_INVALID:
>>  	case VIODEV_MSG_IO_READ:
>>  	case VIODEV_MSG_IO_WRITE:
>> Index: usr.sbin/vmd/virtio.h
>> ===================================================================
>> RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v
>> diff -u -p -r1.60 virtio.h
>> --- usr.sbin/vmd/virtio.h	14 Jan 2026 03:09:05 -0000	1.60
>> +++ usr.sbin/vmd/virtio.h	16 Jan 2026 18:24:50 -0000
>> @@ -134,6 +134,7 @@ struct viodev_msg {
>>  #define VIODEV_MSG_IO_WRITE	5
>>  #define VIODEV_MSG_DUMP		6
>>  #define VIODEV_MSG_SHUTDOWN	7
>> +#define VIODEV_MSG_TUNSCAP	8
>>
>>  	uint16_t reg;		/* VirtIO register */
>>  	uint8_t io_sz;		/* IO instruction size */
>> @@ -309,6 +310,9 @@ struct virtio_net_hdr {
>>  	uint16_t padding_reserved;
>>  	*/
>>  };
>> +
>> +#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1 /* flags */
>> +#define VIRTIO_NET_HDR_F_DATA_VALID	2 /* flags */
>>
>>  enum vmmci_cmd {
>>  	VMMCI_NONE = 0,