From: Jan Klemkow Subject: vmd(8): TCP/UDP checksum offloading for guests To: tech@openbsd.org Date: Fri, 16 Aug 2024 14:51:06 +0200 Hi, This diff introduces the transmit side of TCP/UDP checksum offloading for vmd(8) guests. It uses a similar implementation as FreeBSD and Linux. vmd(8) tell tap(4) via a new ioctl to expect virtio net header structs. These are used to transport offloading feature bits between guest and tap(4) device. The tap(4) translates the feature bits between virtio net header and our mbufs. This diff just contains the sending part, to keep it short. When both parts are implemented, we never calculate a checksum of packets between our guests or the host system. As its described in the virtio-network specification: A network packet transported between two guests on the same system might not need checksumming at all, nor segmentation, if both guests are amenable. This behavior is similar to our loopback traffic, as we also don't calculate checksums of packets between two processes here. Tests and any kind of feedback is welcome! bye, Jan Index: sys/dev/pv/if_vio.c =================================================================== RCS file: /cvs/src/sys/dev/pv/if_vio.c,v diff -u -p -r1.45 if_vio.c --- sys/dev/pv/if_vio.c 1 Aug 2024 11:13:19 -0000 1.45 +++ sys/dev/pv/if_vio.c 16 Aug 2024 08:47:15 -0000 @@ -59,154 +59,7 @@ #define DPRINTF(x...) #endif -/* - * if_vioreg.h: - */ -/* Configuration registers */ -#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ -#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ - -/* Feature bits */ -#define VIRTIO_NET_F_CSUM (1ULL<<0) -#define VIRTIO_NET_F_GUEST_CSUM (1ULL<<1) -#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS (1ULL<<2) -#define VIRTIO_NET_F_MTU (1ULL<<3) -#define VIRTIO_NET_F_MAC (1ULL<<5) -#define VIRTIO_NET_F_GSO (1ULL<<6) -#define VIRTIO_NET_F_GUEST_TSO4 (1ULL<<7) -#define VIRTIO_NET_F_GUEST_TSO6 (1ULL<<8) -#define VIRTIO_NET_F_GUEST_ECN (1ULL<<9) -#define VIRTIO_NET_F_GUEST_UFO (1ULL<<10) -#define VIRTIO_NET_F_HOST_TSO4 (1ULL<<11) -#define VIRTIO_NET_F_HOST_TSO6 (1ULL<<12) -#define VIRTIO_NET_F_HOST_ECN (1ULL<<13) -#define VIRTIO_NET_F_HOST_UFO (1ULL<<14) -#define VIRTIO_NET_F_MRG_RXBUF (1ULL<<15) -#define VIRTIO_NET_F_STATUS (1ULL<<16) -#define VIRTIO_NET_F_CTRL_VQ (1ULL<<17) -#define VIRTIO_NET_F_CTRL_RX (1ULL<<18) -#define VIRTIO_NET_F_CTRL_VLAN (1ULL<<19) -#define VIRTIO_NET_F_CTRL_RX_EXTRA (1ULL<<20) -#define VIRTIO_NET_F_GUEST_ANNOUNCE (1ULL<<21) -#define VIRTIO_NET_F_MQ (1ULL<<22) -#define VIRTIO_NET_F_CTRL_MAC_ADDR (1ULL<<23) -#define VIRTIO_NET_F_HOST_USO (1ULL<<56) -#define VIRTIO_NET_F_HASH_REPORT (1ULL<<57) -#define VIRTIO_NET_F_GUEST_HDRLEN (1ULL<<59) -#define VIRTIO_NET_F_RSS (1ULL<<60) -#define VIRTIO_NET_F_RSC_EXT (1ULL<<61) -#define VIRTIO_NET_F_STANDBY (1ULL<<62) -#define VIRTIO_NET_F_SPEED_DUPLEX (1ULL<<63) -/* - * Config(8) flags. The lowest byte is reserved for generic virtio stuff. - */ - -/* Workaround for vlan related bug in qemu < version 2.0 */ -#define CONFFLAG_QEMU_VLAN_BUG (1<<8) - -static const struct virtio_feature_name virtio_net_feature_names[] = { -#if VIRTIO_DEBUG - { VIRTIO_NET_F_CSUM, "CSum" }, - { VIRTIO_NET_F_GUEST_CSUM, "GuestCSum" }, - { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlGuestOffl" }, - { VIRTIO_NET_F_MTU, "MTU", }, - { VIRTIO_NET_F_MAC, "MAC" }, - { VIRTIO_NET_F_GSO, "GSO" }, - { VIRTIO_NET_F_GUEST_TSO4, "GuestTSO4" }, - { VIRTIO_NET_F_GUEST_TSO6, "GuestTSO6" }, - { VIRTIO_NET_F_GUEST_ECN, "GuestECN" }, - { VIRTIO_NET_F_GUEST_UFO, "GuestUFO" }, - { VIRTIO_NET_F_HOST_TSO4, "HostTSO4" }, - { VIRTIO_NET_F_HOST_TSO6, "HostTSO6" }, - { VIRTIO_NET_F_HOST_ECN, "HostECN" }, - { VIRTIO_NET_F_HOST_UFO, "HostUFO" }, - { VIRTIO_NET_F_MRG_RXBUF, "MrgRXBuf" }, - { VIRTIO_NET_F_STATUS, "Status" }, - { VIRTIO_NET_F_CTRL_VQ, "CtrlVQ" }, - { VIRTIO_NET_F_CTRL_RX, "CtrlRX" }, - { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLAN" }, - { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRXExtra" }, - { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, - { VIRTIO_NET_F_MQ, "MQ" }, - { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMAC" }, - { VIRTIO_NET_F_HOST_USO, "HostUso" }, - { VIRTIO_NET_F_HASH_REPORT, "HashRpt" }, - { VIRTIO_NET_F_GUEST_HDRLEN, "GuestHdrlen" }, - { VIRTIO_NET_F_RSS, "RSS" }, - { VIRTIO_NET_F_RSC_EXT, "RSSExt" }, - { VIRTIO_NET_F_STANDBY, "Stdby" }, - { VIRTIO_NET_F_SPEED_DUPLEX, "SpdDplx" }, -#endif - { 0, NULL } -}; - -/* Status */ -#define VIRTIO_NET_S_LINK_UP 1 - -/* Packet header structure */ -struct virtio_net_hdr { - uint8_t flags; - uint8_t gso_type; - uint16_t hdr_len; - uint16_t gso_size; - uint16_t csum_start; - uint16_t csum_offset; - - /* only present if VIRTIO_NET_F_MRG_RXBUF is negotiated */ - uint16_t num_buffers; -} __packed; - -#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */ -#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* flags */ -#define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */ -#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */ -#define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */ -#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */ -#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */ - -#define VIRTIO_NET_MAX_GSO_LEN (65536+ETHER_HDR_LEN) - -/* Control virtqueue */ -struct virtio_net_ctrl_cmd { - uint8_t class; - uint8_t command; -} __packed; -#define VIRTIO_NET_CTRL_RX 0 -# define VIRTIO_NET_CTRL_RX_PROMISC 0 -# define VIRTIO_NET_CTRL_RX_ALLMULTI 1 - -#define VIRTIO_NET_CTRL_MAC 1 -# define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 - -#define VIRTIO_NET_CTRL_VLAN 2 -# define VIRTIO_NET_CTRL_VLAN_ADD 0 -# define VIRTIO_NET_CTRL_VLAN_DEL 1 - -#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 -# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 - -struct virtio_net_ctrl_status { - uint8_t ack; -} __packed; -#define VIRTIO_NET_OK 0 -#define VIRTIO_NET_ERR 1 - -struct virtio_net_ctrl_rx { - uint8_t onoff; -} __packed; - -struct virtio_net_ctrl_guest_offloads { - uint64_t offloads; -} __packed; - -struct virtio_net_ctrl_mac_tbl { - uint32_t nentries; - uint8_t macs[][ETHER_ADDR_LEN]; -} __packed; - -struct virtio_net_ctrl_vlan { - uint16_t id; -} __packed; +#include "if_vioreg.h" /* * if_viovar.h: Index: sys/dev/pv/if_vioreg.h =================================================================== RCS file: sys/dev/pv/if_vioreg.h diff -N sys/dev/pv/if_vioreg.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/dev/pv/if_vioreg.h 16 Aug 2024 08:47:15 -0000 @@ -0,0 +1,182 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg. + * Copyright (c) 2010 Minoura Makoto. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Configuration registers */ +#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ +#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ + +/* Feature bits */ +#define VIRTIO_NET_F_CSUM (1ULL<<0) +#define VIRTIO_NET_F_GUEST_CSUM (1ULL<<1) +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS (1ULL<<2) +#define VIRTIO_NET_F_MTU (1ULL<<3) +#define VIRTIO_NET_F_MAC (1ULL<<5) +#define VIRTIO_NET_F_GSO (1ULL<<6) +#define VIRTIO_NET_F_GUEST_TSO4 (1ULL<<7) +#define VIRTIO_NET_F_GUEST_TSO6 (1ULL<<8) +#define VIRTIO_NET_F_GUEST_ECN (1ULL<<9) +#define VIRTIO_NET_F_GUEST_UFO (1ULL<<10) +#define VIRTIO_NET_F_HOST_TSO4 (1ULL<<11) +#define VIRTIO_NET_F_HOST_TSO6 (1ULL<<12) +#define VIRTIO_NET_F_HOST_ECN (1ULL<<13) +#define VIRTIO_NET_F_HOST_UFO (1ULL<<14) +#define VIRTIO_NET_F_MRG_RXBUF (1ULL<<15) +#define VIRTIO_NET_F_STATUS (1ULL<<16) +#define VIRTIO_NET_F_CTRL_VQ (1ULL<<17) +#define VIRTIO_NET_F_CTRL_RX (1ULL<<18) +#define VIRTIO_NET_F_CTRL_VLAN (1ULL<<19) +#define VIRTIO_NET_F_CTRL_RX_EXTRA (1ULL<<20) +#define VIRTIO_NET_F_GUEST_ANNOUNCE (1ULL<<21) +#define VIRTIO_NET_F_MQ (1ULL<<22) +#define VIRTIO_NET_F_CTRL_MAC_ADDR (1ULL<<23) +#define VIRTIO_NET_F_HOST_USO (1ULL<<56) +#define VIRTIO_NET_F_HASH_REPORT (1ULL<<57) +#define VIRTIO_NET_F_GUEST_HDRLEN (1ULL<<59) +#define VIRTIO_NET_F_RSS (1ULL<<60) +#define VIRTIO_NET_F_RSC_EXT (1ULL<<61) +#define VIRTIO_NET_F_STANDBY (1ULL<<62) +#define VIRTIO_NET_F_SPEED_DUPLEX (1ULL<<63) +/* + * Config(8) flags. The lowest byte is reserved for generic virtio stuff. + */ + +/* Workaround for vlan related bug in qemu < version 2.0 */ +#define CONFFLAG_QEMU_VLAN_BUG (1<<8) + +static const struct virtio_feature_name virtio_net_feature_names[] = { +#if VIRTIO_DEBUG + { VIRTIO_NET_F_CSUM, "CSum" }, + { VIRTIO_NET_F_GUEST_CSUM, "GuestCSum" }, + { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlGuestOffl" }, + { VIRTIO_NET_F_MTU, "MTU", }, + { VIRTIO_NET_F_MAC, "MAC" }, + { VIRTIO_NET_F_GSO, "GSO" }, + { VIRTIO_NET_F_GUEST_TSO4, "GuestTSO4" }, + { VIRTIO_NET_F_GUEST_TSO6, "GuestTSO6" }, + { VIRTIO_NET_F_GUEST_ECN, "GuestECN" }, + { VIRTIO_NET_F_GUEST_UFO, "GuestUFO" }, + { VIRTIO_NET_F_HOST_TSO4, "HostTSO4" }, + { VIRTIO_NET_F_HOST_TSO6, "HostTSO6" }, + { VIRTIO_NET_F_HOST_ECN, "HostECN" }, + { VIRTIO_NET_F_HOST_UFO, "HostUFO" }, + { VIRTIO_NET_F_MRG_RXBUF, "MrgRXBuf" }, + { VIRTIO_NET_F_STATUS, "Status" }, + { VIRTIO_NET_F_CTRL_VQ, "CtrlVQ" }, + { VIRTIO_NET_F_CTRL_RX, "CtrlRX" }, + { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLAN" }, + { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRXExtra" }, + { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, + { VIRTIO_NET_F_MQ, "MQ" }, + { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMAC" }, + { VIRTIO_NET_F_HOST_USO, "HostUso" }, + { VIRTIO_NET_F_HASH_REPORT, "HashRpt" }, + { VIRTIO_NET_F_GUEST_HDRLEN, "GuestHdrlen" }, + { VIRTIO_NET_F_RSS, "RSS" }, + { VIRTIO_NET_F_RSC_EXT, "RSSExt" }, + { VIRTIO_NET_F_STANDBY, "Stdby" }, + { VIRTIO_NET_F_SPEED_DUPLEX, "SpdDplx" }, +#endif + { 0, NULL } +}; + +/* Status */ +#define VIRTIO_NET_S_LINK_UP 1 + +/* Packet header structure */ +struct virtio_net_hdr { + uint8_t flags; + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; + + /* only present if VIRTIO_NET_F_MRG_RXBUF is negotiated */ + uint16_t num_buffers; +} __packed; + +struct virtio_net_hdr_legacy { + uint8_t flags; + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; +} __packed; + +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* flags */ +#define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */ + +#define VIRTIO_NET_MAX_GSO_LEN (65536+ETHER_HDR_LEN) + +/* Control virtqueue */ +struct virtio_net_ctrl_cmd { + uint8_t class; + uint8_t command; +} __packed; +#define VIRTIO_NET_CTRL_RX 0 +# define VIRTIO_NET_CTRL_RX_PROMISC 0 +# define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + +#define VIRTIO_NET_CTRL_MAC 1 +# define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + +#define VIRTIO_NET_CTRL_VLAN 2 +# define VIRTIO_NET_CTRL_VLAN_ADD 0 +# define VIRTIO_NET_CTRL_VLAN_DEL 1 + +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + +struct virtio_net_ctrl_status { + uint8_t ack; +} __packed; +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +struct virtio_net_ctrl_rx { + uint8_t onoff; +} __packed; + +struct virtio_net_ctrl_guest_offloads { + uint64_t offloads; +} __packed; + +struct virtio_net_ctrl_mac_tbl { + uint32_t nentries; + uint8_t macs[][ETHER_ADDR_LEN]; +} __packed; + +struct virtio_net_ctrl_vlan { + uint16_t id; +} __packed; Index: sys/net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v diff -u -p -r1.240 if_tun.c --- sys/net/if_tun.c 23 Dec 2023 10:52:54 -0000 1.240 +++ sys/net/if_tun.c 16 Aug 2024 12:00:46 -0000 @@ -63,8 +63,15 @@ #include #include +#include +#include +#include +#include #include +#include +#include + #include "bpfilter.h" #if NBPFILTER > 0 #include @@ -92,6 +99,7 @@ struct tun_softc { dev_t sc_dev; struct refcnt sc_refs; unsigned int sc_reading; + int sc_vhdrlen; }; #ifdef TUN_DEBUG @@ -779,6 +787,17 @@ tun_dev_ioctl(dev_t dev, u_long cmd, voi bcopy(data, sc->sc_ac.ac_enaddr, sizeof(sc->sc_ac.ac_enaddr)); break; + case TAPSVNETHDR: + if (*(int *)data != 0 && + *(int *)data != sizeof(struct virtio_net_hdr) && + *(int *)data != sizeof(struct virtio_net_hdr_legacy)) { + error = EINVAL; + break; + } + + sc->sc_vhdrlen = *(int *)data; + + break; default: error = ENOTTY; break; @@ -828,6 +847,17 @@ tun_dev_read(dev_t dev, struct uio *uio, bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT); #endif + size_t len = ulmin(uio->uio_resid, sc->sc_vhdrlen); + if (len > 0) { + struct virtio_net_hdr vhdr; + + bzero(&vhdr, sizeof(vhdr)); + + error = uiomove(&vhdr, len, uio); + if (error != 0) + goto free; + } + m = m0; while (uio->uio_resid > 0) { size_t len = ulmin(uio->uio_resid, m->m_len); @@ -841,7 +871,7 @@ tun_dev_read(dev_t dev, struct uio *uio, if (m == NULL) break; } - +free: m_freem(m0); put: @@ -870,6 +900,7 @@ tun_dev_write(dev_t dev, struct uio *uio struct tun_softc *sc; struct ifnet *ifp; struct mbuf *m0; + struct virtio_net_hdr vhdr; int error = 0; size_t mlen; @@ -905,9 +936,31 @@ tun_dev_write(dev_t dev, struct uio *uio m0->m_pkthdr.len = m0->m_len = mlen; m_adj(m0, align); + if (sc->sc_vhdrlen > 0) { + error = uiomove(&vhdr, sc->sc_vhdrlen, uio); + if (error != 0) + goto drop; + } + error = uiomove(mtod(m0, void *), m0->m_len, uio); if (error != 0) goto drop; + + if (sc->sc_vhdrlen > 0) { + if (ISSET(vhdr.flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) { + struct ether_extracted ext; + + ether_extract_headers(m0, &ext); + + if (ext.tcp) { + SET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK); + SET(m0->m_pkthdr.csum_flags, M_TCP_CSUM_OUT); + } else if (ext.udp) { + SET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_IN_OK); + SET(m0->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); + } + } + } NET_LOCK(); if_vinput(ifp, m0); Index: sys/net/if_tun.h =================================================================== RCS file: /cvs/src/sys/net/if_tun.h,v diff -u -p -r1.15 if_tun.h --- sys/net/if_tun.h 6 Feb 2007 10:49:40 -0000 1.15 +++ sys/net/if_tun.h 16 Aug 2024 12:00:43 -0000 @@ -68,4 +68,7 @@ struct tuninfo { #define TUNSDEBUG _IOW('t', 94, int) #define TUNGDEBUG _IOR('t', 95, int) +/* ioctl to set the virtio-net header size */ +#define TAPSVNETHDR _IOW('t', 96, int) + #endif /* _NET_IF_TUN_H_ */ Index: usr.sbin/vmd/vionet.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/vionet.c,v diff -u -p -r1.16 vionet.c --- usr.sbin/vmd/vionet.c 12 Jul 2024 14:34:08 -0000 1.16 +++ usr.sbin/vmd/vionet.c 16 Aug 2024 12:00:43 -0000 @@ -18,6 +18,7 @@ */ #include #include +#include #include #include @@ -46,11 +47,6 @@ extern char *__progname; extern struct vmd_vm *current_vm; -struct packet { - uint8_t *buf; - size_t len; -}; - static void *rx_run_loop(void *); static void *tx_run_loop(void *); static int vionet_rx(struct vionet_dev *, int); @@ -102,13 +98,7 @@ vionet_main(int fd, int fd_vmm) struct vm_create_params *vcp; ssize_t sz; int ret; - - /* - * stdio - needed for read/write to disk fds and channels to the vm. - * vmm + proc - needed to create shared vm mappings. - */ - if (pledge("stdio vmm proc", NULL) == -1) - fatal("pledge"); + int vhdrlen = sizeof(struct virtio_net_hdr); /* Initialize iovec arrays. */ memset(iov_rx, 0, sizeof(iov_rx)); @@ -133,6 +123,17 @@ vionet_main(int fd, int fd_vmm) ", vmm fd = %d", __func__, vionet->data_fd, dev.sync_fd, dev.async_fd, fd_vmm); + /* Activate virtio_net_hdr for tap(4) device. */ + if (ioctl(vionet->data_fd, TAPSVNETHDR, &vhdrlen) == -1) + fatal("%s: TAPSVNETHDR option on tap(4) failed", __func__); + + /* + * stdio - needed for read/write to disk fds and channels to the vm. + * vmm + proc - needed to create shared vm mappings. + */ + if (pledge("stdio vmm proc", NULL) == -1) + fatal("pledge"); + /* Receive our vm information from the vm process. */ memset(&vm, 0, sizeof(vm)); sz = atomicio(read, dev.sync_fd, &vm, sizeof(vm)); @@ -413,6 +414,7 @@ vionet_rx(struct vionet_dev *dev, int fd iov = &iov_rx[0]; iov_cnt = 1; + chain_len = 0; /* * First descriptor should be at least as large as the @@ -425,22 +427,9 @@ vionet_rx(struct vionet_dev *dev, int fd goto reset; } - /* - * Insert the virtio_net_hdr and adjust len/base. We do the - * pointer math here before it's a void*. - */ iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); if (iov->iov_base == NULL) goto reset; - memset(iov->iov_base, 0, sizeof(struct virtio_net_hdr)); - - /* Tweak the iovec to account for the virtio_net_hdr. */ - iov->iov_len -= sizeof(struct virtio_net_hdr); - iov->iov_base = hvaddr_mem(desc->addr + - sizeof(struct virtio_net_hdr), iov->iov_len); - if (iov->iov_base == NULL) - goto reset; - chain_len = iov->iov_len; /* * Walk the remaining chain and collect remaining addresses @@ -491,12 +480,6 @@ vionet_rx(struct vionet_dev *dev, int fd if (sz == 0) /* No packets, so bail out for now. */ break; - /* - * Account for the prefixed header since it wasn't included - * in the copy or zerocopy operations. - */ - sz += sizeof(struct virtio_net_hdr); - /* Mark our buffers as used. */ used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_idx; used->ring[used->idx & VIONET_QUEUE_MASK].len = sz; @@ -533,22 +516,19 @@ vionet_rx_copy(struct vionet_dev *dev, i int iov_cnt, size_t chain_len) { static uint8_t buf[VIONET_HARD_MTU]; - struct packet *pkt = NULL; struct ether_header *eh = NULL; uint8_t *payload = buf; size_t i, chunk, nbytes, copied = 0; ssize_t sz; - /* If reading from the tap(4), try to right-size the read. */ - if (fd == dev->data_fd) - nbytes = MIN(chain_len, VIONET_HARD_MTU); - else if (fd == pipe_inject[READ]) - nbytes = sizeof(struct packet); - else { + if (fd != dev->data_fd && fd != pipe_inject[READ]) { log_warnx("%s: invalid fd: %d", __func__, fd); return (-1); } + /* If reading from the tap(4), try to right-size the read. */ + nbytes = MIN(chain_len, VIONET_HARD_MTU); + /* * Try to pull a packet. The fd should be non-blocking and we don't * care if we under-read (i.e. sz != nbytes) as we may not have a @@ -560,36 +540,18 @@ vionet_rx_copy(struct vionet_dev *dev, i log_warn("%s: error reading packet", __func__); return (-1); } + log_warn("%s: EAGAIN", __func__); return (0); - } else if (fd == dev->data_fd && sz < VIONET_MIN_TXLEN) { + } else if (fd == dev->data_fd && (size_t)sz < VIONET_MIN_TXLEN) { /* If reading the tap(4), we should get valid ethernet. */ log_warnx("%s: invalid packet size", __func__); return (0); - } else if (fd == pipe_inject[READ] && sz != sizeof(struct packet)) { - log_warnx("%s: invalid injected packet object (sz=%ld)", - __func__, sz); - return (0); - } - - /* Decompose an injected packet, if that's what we're working with. */ - if (fd == pipe_inject[READ]) { - pkt = (struct packet *)buf; - if (pkt->buf == NULL) { - log_warnx("%s: invalid injected packet, no buffer", - __func__); - return (0); - } - if (sz < VIONET_MIN_TXLEN || sz > VIONET_MAX_TXLEN) { - log_warnx("%s: invalid injected packet size", __func__); - goto drop; - } - payload = pkt->buf; - sz = (ssize_t)pkt->len; } /* Validate the ethernet header, if required. */ if (dev->lockedmac) { - eh = (struct ether_header *)(payload); + eh = (struct ether_header *)(payload + + sizeof(struct virtio_net_hdr)); if (!ETHER_IS_MULTICAST(eh->ether_dhost) && memcmp(eh->ether_dhost, dev->mac, sizeof(eh->ether_dhost)) != 0) @@ -610,10 +572,6 @@ vionet_rx_copy(struct vionet_dev *dev, i } drop: - /* Free any injected packet buffer. */ - if (pkt != NULL) - free(pkt->buf); - return (copied); } @@ -724,7 +682,6 @@ vionet_tx(struct virtio_dev *dev) struct virtio_vq_info *vq_info; struct ether_header *eh; struct iovec *iov; - struct packet pkt; uint8_t status = 0; status = vionet->cfg.device_status @@ -766,17 +723,15 @@ vionet_tx(struct virtio_dev *dev) if (iov->iov_len < sizeof(struct virtio_net_hdr)) { log_warnx("%s: invalid descriptor length", __func__); goto reset; - } else if (iov->iov_len > sizeof(struct virtio_net_hdr)) { - /* Chop off the virtio header, leaving packet data. */ - iov->iov_len -= sizeof(struct virtio_net_hdr); - chain_len += iov->iov_len; - iov->iov_base = hvaddr_mem(desc->addr + - sizeof(struct virtio_net_hdr), iov->iov_len); - if (iov->iov_base == NULL) - goto reset; - iov_cnt++; } + /* Handel the first descriptor. */ + chain_len += iov->iov_len; + iov->iov_base = hvaddr_mem(desc->addr, iov->iov_len); + if (iov->iov_base == NULL) + goto reset; + iov_cnt++; + /* * Walk the chain and collect remaining addresses and lengths. */ @@ -818,6 +773,8 @@ vionet_tx(struct virtio_dev *dev) * for this inspection. */ iov = &iov_tx[0]; + if (iov->iov_len == sizeof(struct virtio_net_hdr)) + iov = &iov_tx[1]; if (vionet->lockedmac) { if (iov->iov_len < ETHER_HDR_LEN) { log_warnx("%s: insufficient header data", @@ -839,6 +796,7 @@ vionet_tx(struct virtio_dev *dev) if (dhcpsz > 0) { log_debug("%s: detected dhcp request of %zu bytes", __func__, dhcpsz); + sz = chain_len; goto drop; } } @@ -859,20 +817,23 @@ drop: /* Facilitate DHCP reply injection, if needed. */ if (dhcpsz > 0) { - pkt.buf = dhcppkt; - pkt.len = dhcpsz; - sz = write(pipe_inject[WRITE], &pkt, sizeof(pkt)); - if (sz == -1 && errno != EAGAIN) { + struct virtio_net_hdr vhdr = { 0 }; + struct iovec iovec[2] = { + { .iov_base = &vhdr, .iov_len = sizeof(vhdr) }, + { .iov_base = dhcppkt, .iov_len = dhcpsz } + }; + + sz = writev(pipe_inject[WRITE], iovec, 2); + + if (sz == -1 && errno != EAGAIN) log_warn("%s: packet injection", __func__); - free(pkt.buf); - } else if (sz == -1 && errno == EAGAIN) { + else if (sz == -1 && errno == EAGAIN) log_debug("%s: dropping dhcp reply", __func__); - free(pkt.buf); - } else if (sz != sizeof(pkt)) { + else if (sz != (ssize_t)(sizeof(vhdr) + dhcpsz)) log_warnx("%s: failed packet injection", __func__); - free(pkt.buf); - } + + free(dhcppkt); log_debug("%s: injected dhcp reply with %ld bytes", __func__, sz); } Index: usr.sbin/vmd/virtio.c =================================================================== RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v diff -u -p -r1.115 virtio.c --- usr.sbin/vmd/virtio.c 10 Jul 2024 09:27:33 -0000 1.115 +++ usr.sbin/vmd/virtio.c 16 Aug 2024 12:00:46 -0000 @@ -60,6 +60,7 @@ SLIST_HEAD(virtio_dev_head, virtio_dev) #define MAXPHYS (64 * 1024) /* max raw I/O transfer size */ +#define VIRTIO_NET_F_CSUM (1<<0) #define VIRTIO_NET_F_MAC (1<<5) #define VMMCI_F_TIMESYNC (1<<0) @@ -584,6 +585,9 @@ virtio_init(struct vmd_vm *vm, int child /* MAC address has been assigned by the parent */ memcpy(&dev->vionet.mac, &vmc->vmc_macs[i], 6); dev->vionet.cfg.device_feature = VIRTIO_NET_F_MAC; + + /* enable checksum offloading */ + dev->vionet.cfg.device_feature |= VIRTIO_NET_F_CSUM; dev->vionet.lockedmac = vmc->vmc_ifflags[i] & VMIFF_LOCKED ? 1 : 0; Index: usr.sbin/vmd/virtio.h =================================================================== RCS file: /cvs/src/usr.sbin/vmd/virtio.h,v diff -u -p -r1.52 virtio.h --- usr.sbin/vmd/virtio.h 10 Jul 2024 09:27:33 -0000 1.52 +++ usr.sbin/vmd/virtio.h 16 Aug 2024 12:00:43 -0000 @@ -49,7 +49,7 @@ /* Virtio network device is backed by tap(4), so inherit limits */ #define VIONET_HARD_MTU TUNMRU -#define VIONET_MIN_TXLEN ETHER_HDR_LEN +#define VIONET_MIN_TXLEN (sizeof(struct virtio_net_hdr) + ETHER_HDR_LEN) #define VIONET_MAX_TXLEN VIONET_HARD_MTU + ETHER_HDR_LEN /* VMM Control Interface shutdown timeout (in seconds) */