Index | Thread | Search

From:
Stefan Fritsch <sf@openbsd.org>
Subject:
vio: Enable multiqueue
To:
tech@openbsd.org
Date:
Tue, 7 Jan 2025 09:26:55 +0100

Download raw body.

Thread
Hi,

this diff finally enables multiqueue for vio(4). It goes on top of the 
"virtio: Support unused virtqueues" diff from my previous mail.

The distribution of of packets to the enabled queues is not optimal. To 
improve this, one would need the optional RSS (receive-side scaling) 
feature which is difficult to configure with libvirt/qemu and therefore 
usually not available on hypervisors. Things may improve with future 
libvirt versions. RSS support is not included in this diff. But even 
without RSS, we have seen some nice performance gains.
    
We use a single interrupt vector for every rx/tx queue pair. With config
and control queue vectors, we need N+2 vectors for N queues.  If
multi-queue is not available, the old scheme is used with either one
vector per virtqueue or one vector for all queues.
    
* virtio: Add API to establish interrupts on specific cpus in child
  drivers. Also make virtio_pci_setup_msix return proper errno.
    
* virtio_pci: Increase max number of MSIX vectors
    
* vio: Configure multiple queues and allocate proper interrupts.


I am not entirely happy with the API for establishing interrupts. But 
there are several variants how interrupts need to be handled (virtio pci 
with or without MSIX, virtio mmio) and I want to keep those details from 
the child drivers as far as possible. The way I have implemented it, only 
the child drivers that need to allocate per-cpu interrupts need to deal 
with the new API. If anyone has a better idea, I would be interested to 
hear it.

comments? ok?

Cheers,
Stefan

diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index da7f2c3bea3..3ca631dcca0 100644
--- a/sys/dev/fdt/virtio_mmio.c
+++ b/sys/dev/fdt/virtio_mmio.c
@@ -105,6 +105,8 @@ int		virtio_mmio_negotiate_features(struct virtio_softc *,
     const struct virtio_feature_name *);
 int		virtio_mmio_intr(void *);
 void		virtio_mmio_intr_barrier(struct virtio_softc *);
+int		virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *,
+    int, struct cpu_info *, int (*)(void *), void *);
 
 struct virtio_mmio_softc {
 	struct virtio_softc	sc_sc;
@@ -160,6 +162,7 @@ const struct virtio_ops virtio_mmio_ops = {
 	virtio_mmio_attach_finish,
 	virtio_mmio_intr,
 	virtio_mmio_intr_barrier,
+	virtio_mmio_intr_establish,
 };
 
 uint16_t
@@ -546,3 +549,11 @@ virtio_mmio_intr_barrier(struct virtio_softc *vsc)
 	if (sc->sc_ih)
 		intr_barrier(sc->sc_ih);
 }
+
+int
+virtio_mmio_intr_establish(struct virtio_softc *vsc,
+    struct virtio_attach_args *va, int vec, struct cpu_info *ci,
+    int (*func)(void *), void *arg)
+{
+	return ENXIO;
+}
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 8463f6223de..5f91d0ebe77 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -50,7 +50,7 @@
  * XXX: PCI-endian while the device specific registers are native endian.
  */
 
-#define MAX_MSIX_VECS	8
+#define MAX_MSIX_VECS	16
 
 struct virtio_pci_softc;
 struct virtio_pci_attach_args;
@@ -62,7 +62,7 @@ int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p
 int		virtio_pci_detach(struct device *, int);
 
 void		virtio_pci_kick(struct virtio_softc *, uint16_t);
-int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
+int		virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset);
 uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
 uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
 uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
@@ -81,9 +81,10 @@ int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_fe
 int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
 void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
 void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
-int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *);
+int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
 int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int);
 void		virtio_pci_intr_barrier(struct virtio_softc *);
+int		virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
 void		virtio_pci_free_irqs(struct virtio_pci_softc *);
 int		virtio_pci_poll_intr(void *);
 int		virtio_pci_legacy_intr(void *);
@@ -100,6 +101,7 @@ enum irq_type {
 	IRQ_NO_MSIX,
 	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
 	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
+	IRQ_MSIX_CHILD,  /* assigned by child driver */
 };
 
 struct virtio_pci_intr {
@@ -179,6 +181,7 @@ const struct virtio_ops virtio_pci_ops = {
 	virtio_pci_attach_finish,
 	virtio_pci_poll_intr,
 	virtio_pci_intr_barrier,
+	virtio_pci_intr_establish,
 };
 
 static inline uint64_t
@@ -648,10 +651,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux)
 		goto free;
 	}
 
-	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
 	sc->sc_irq_type = IRQ_NO_MSIX;
-	if (virtio_pci_adjust_config_region(sc) != 0)
-		goto err;
+	if (virtio_pci_adjust_config_region(sc,
+	    VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0)
+	{
+		goto free;
+	}
 
 	virtio_device_reset(vsc);
 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
@@ -692,7 +697,9 @@ virtio_pci_attach_finish(struct virtio_softc *vsc,
 	pci_chipset_tag_t pc = vpa->vpa_pa->pa_pc;
 	char const *intrstr;
 
-	if (virtio_pci_setup_msix(sc, vpa, 0) == 0) {
+	if (sc->sc_irq_type == IRQ_MSIX_CHILD) {
+		intrstr = "msix";
+	} else if (virtio_pci_setup_msix(sc, vpa, 0) == 0) {
 		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
 		intrstr = "msix per-VQ";
 	} else if (virtio_pci_setup_msix(sc, vpa, 1) == 0) {
@@ -754,11 +761,14 @@ virtio_pci_detach(struct device *self, int flags)
 }
 
 int
-virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
+virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset)
 {
 	if (sc->sc_sc.sc_version_1)
 		return 0;
-	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
+	if (sc->sc_devcfg_offset == offset)
+		return 0;
+	sc->sc_devcfg_offset = offset;
+	sc->sc_devcfg_iosize = sc->sc_iosize - offset;
 	sc->sc_devcfg_iot = sc->sc_iot;
 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
 	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
@@ -958,30 +968,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc,
 
 int
 virtio_pci_msix_establish(struct virtio_pci_softc *sc,
-    struct virtio_pci_attach_args *vpa, int idx,
+    struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci,
     int (*handler)(void *), void *ih_arg)
 {
 	struct virtio_softc *vsc = &sc->sc_sc;
 	pci_intr_handle_t ih;
+	int r;
 
 	KASSERT(idx < sc->sc_nintr);
 
-	if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) {
+	r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih);
+	if (r != 0) {
 #if VIRTIO_DEBUG
 		printf("%s[%d]: pci_intr_map_msix failed\n",
 		    vsc->sc_dev.dv_xname, idx);
 #endif
-		return 1;
+		return r;
 	}
 	snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d",
 	    vsc->sc_child->dv_xname, idx);
-	sc->sc_intr[idx].ih = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
-	    handler, ih_arg, sc->sc_intr[idx].name);
+	sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl,
+	    ci, handler, ih_arg, sc->sc_intr[idx].name);
 	if (sc->sc_intr[idx].ih == NULL) {
 		printf("%s[%d]: couldn't establish msix interrupt\n",
-		    vsc->sc_dev.dv_xname, idx);
-		return 1;
+		    vsc->sc_child->dv_xname, idx);
+		return ENOMEM;
 	}
+	virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI);
 	return 0;
 }
 
@@ -1031,8 +1044,8 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc)
 		}
 	}
 
-	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
-	virtio_pci_adjust_config_region(sc);
+	/* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */
+	virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI);
 }
 
 int
@@ -1040,34 +1053,33 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
     struct virtio_pci_attach_args *vpa, int shared)
 {
 	struct virtio_softc *vsc = &sc->sc_sc;
-	int i;
+	int i, r = 0;
 
 	/* Shared needs config + queue */
 	if (shared && vpa->vpa_va.va_nintr < 1 + 1)
-		return 1;
+		return ERANGE;
 	/* Per VQ needs config + N * queue */
 	if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs)
-		return 1;
+		return ERANGE;
 
-	if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc))
-		return 1;
-	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
-	virtio_pci_adjust_config_region(sc);
+	r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc);
+	if (r != 0)
+		return r;
 
 	if (shared) {
-		if (virtio_pci_msix_establish(sc, vpa, 1,
-		    virtio_pci_shared_queue_intr, vsc)) {
+		r = virtio_pci_msix_establish(sc, vpa, 1, NULL,
+		    virtio_pci_shared_queue_intr, vsc);
+		if (r != 0)
 			goto fail;
-		}
 
 		for (i = 0; i < vsc->sc_nvqs; i++)
 			vsc->sc_vqs[i].vq_intr_vec = 1;
 	} else {
 		for (i = 0; i < vsc->sc_nvqs; i++) {
-			if (virtio_pci_msix_establish(sc, vpa, i + 1,
-			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
+			r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL,
+			    virtio_pci_queue_intr, &vsc->sc_vqs[i]);
+			if (r != 0)
 				goto fail;
-			}
 			vsc->sc_vqs[i].vq_intr_vec = i + 1;
 		}
 	}
@@ -1075,7 +1087,28 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
 	return 0;
 fail:
 	virtio_pci_free_irqs(sc);
-	return 1;
+	return r;
+}
+
+int
+virtio_pci_intr_establish(struct virtio_softc *vsc,
+    struct virtio_attach_args *va, int vec, struct cpu_info *ci,
+    int (*func)(void *), void *arg)
+{
+	struct virtio_pci_attach_args *vpa;
+	struct virtio_pci_softc *sc;
+
+	if (vsc->sc_ops != &virtio_pci_ops)
+		return ENXIO;
+
+	vpa = (struct virtio_pci_attach_args *)va;
+	sc = (struct virtio_pci_softc *)vsc;
+
+	if (vec >= sc->sc_nintr || sc->sc_nintr <= 1)
+		return ERANGE;
+
+	sc->sc_irq_type = IRQ_MSIX_CHILD;
+	return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg);
 }
 
 void
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index a728940e314..20d1bfc1ca8 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -32,8 +32,10 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/device.h>
+#include <sys/intrmap.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
+#include <sys/percpu.h>	/* for CACHELINESIZE */
 #include <sys/sockio.h>
 #include <sys/timeout.h>
 
@@ -64,8 +66,15 @@
  * if_vioreg.h:
  */
 /* Configuration registers */
-#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
-#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
+#define VIRTIO_NET_CONFIG_MAC		 0 /*  8 bit x 6 byte */
+#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16 bit */
+#define VIRTIO_NET_CONFIG_MAX_QUEUES	 8 /* 16 bit */
+#define VIRTIO_NET_CONFIG_MTU		10 /* 16 bit */
+#define VIRTIO_NET_CONFIG_SPEED		12 /* 32 bit */
+#define VIRTIO_NET_CONFIG_DUPLEX	16 /*  8 bit */
+#define VIRTIO_NET_CONFIG_RSS_SIZE	17 /*  8 bit */
+#define VIRTIO_NET_CONFIG_RSS_LEN	18 /* 16 bit */
+#define VIRTIO_NET_CONFIG_HASH_TYPES	20 /* 16 bit */
 
 /* Feature bits */
 #define VIRTIO_NET_F_CSUM			(1ULL<<0)
@@ -183,6 +192,11 @@ struct virtio_net_ctrl_cmd {
 # define VIRTIO_NET_CTRL_VLAN_ADD	0
 # define VIRTIO_NET_CTRL_VLAN_DEL	1
 
+#define VIRTIO_NET_CTRL_MQ		4
+# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
+# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG		1
+# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG		2
+
 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS	5
 # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET	0
 
@@ -196,6 +210,12 @@ struct virtio_net_ctrl_rx {
 	uint8_t	onoff;
 } __packed;
 
+struct virtio_net_ctrl_mq_pairs_set {
+	uint16_t virtqueue_pairs;
+};
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
+#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
+
 struct virtio_net_ctrl_guest_offloads {
 	uint64_t offloads;
 } __packed;
@@ -231,7 +251,7 @@ struct vio_queue {
 	struct virtqueue	 *viq_txvq;
 	struct mutex		  viq_txmtx, viq_rxmtx;
 	int			  viq_txfree_slots;
-};
+} __aligned(CACHELINESIZE);
 
 struct vio_softc {
 	struct device		sc_dev;
@@ -251,14 +271,16 @@ struct vio_softc {
 	caddr_t			sc_dma_kva;
 
 	int			sc_hdr_size;
-	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
-	struct virtio_net_ctrl_status *sc_ctrl_status;
-	struct virtio_net_ctrl_rx *sc_ctrl_rx;
-	struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
-	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
+	struct virtio_net_ctrl_cmd		*sc_ctrl_cmd;
+	struct virtio_net_ctrl_status		*sc_ctrl_status;
+	struct virtio_net_ctrl_rx		*sc_ctrl_rx;
+	struct virtio_net_ctrl_mq_pairs_set	*sc_ctrl_mq_pairs;
+	struct virtio_net_ctrl_guest_offloads	*sc_ctrl_guest_offloads;
+	struct virtio_net_ctrl_mac_tbl		*sc_ctrl_mac_tbl_uc;
 #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
-	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
+	struct virtio_net_ctrl_mac_tbl		*sc_ctrl_mac_tbl_mc;
 
+	struct intrmap		*sc_intrmap;
 	struct vio_queue	*sc_q;
 	uint16_t		sc_nqueues;
 	int			sc_tx_slots_per_req;
@@ -317,10 +339,15 @@ void	vio_tx_drain(struct vio_softc *);
 int	vio_encap(struct vio_queue *, int, struct mbuf *);
 void	vio_txtick(void *);
 
+int	vio_queue_intr(void *);
+int	vio_config_intr(void *);
+int	vio_ctrl_intr(void *);
+
 /* other control */
 void	vio_link_state(struct ifnet *);
 int	vio_config_change(struct virtio_softc *);
 int	vio_ctrl_rx(struct vio_softc *, int, int);
+int	vio_ctrl_mq(struct vio_softc *);
 int	vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
 int	vio_set_rx_filter(struct vio_softc *);
 void	vio_iff(struct vio_softc *);
@@ -408,6 +435,8 @@ vio_free_dmamem(struct vio_softc *sc)
  *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
  *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
  *			 (WRITE)
+ *   sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE)
+ *   sc_ctrl_guest_offloads: configure offload features (WRITE)
  *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
  *			 class command (WRITE)
  *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
@@ -449,6 +478,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
 		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
 		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
 		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
+		allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1;
 		allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
 		allocsize += VIO_CTRL_MAC_INFO_SIZE;
 	}
@@ -474,6 +504,8 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
 		offset += sizeof(*sc->sc_ctrl_status);
 		sc->sc_ctrl_rx = (void *)(kva + offset);
 		offset += sizeof(*sc->sc_ctrl_rx);
+		sc->sc_ctrl_mq_pairs = (void *)(kva + offset);
+		offset += sizeof(*sc->sc_ctrl_mq_pairs);
 		sc->sc_ctrl_guest_offloads = (void *)(kva + offset);
 		offset += sizeof(*sc->sc_ctrl_guest_offloads);
 		sc->sc_ctrl_mac_tbl_uc = (void *)(kva + offset);
@@ -598,7 +630,7 @@ vio_attach(struct device *parent, struct device *self, void *aux)
 	struct vio_softc *sc = (struct vio_softc *)self;
 	struct virtio_softc *vsc = (struct virtio_softc *)parent;
 	struct virtio_attach_args *va = aux;
-	int i, tx_max_segments;
+	int i, r, tx_max_segments;
 	struct ifnet *ifp = &sc->sc_ac.ac_if;
 
 	if (vsc->sc_child != NULL) {
@@ -616,6 +648,9 @@ vio_attach(struct device *parent, struct device *self, void *aux)
 	    VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
 	    VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM;
 
+	if (va->va_nintr > 3 && ncpus > 1)
+		vsc->sc_driver_features |= VIRTIO_NET_F_MQ;
+
 	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
 	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
 
@@ -626,10 +661,23 @@ vio_attach(struct device *parent, struct device *self, void *aux)
 	if (virtio_negotiate_features(vsc, virtio_net_feature_names) != 0)
 		goto err;
 
-	sc->sc_nqueues = 1;
-	vsc->sc_nvqs = 2 * sc->sc_nqueues;
-	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
-		vsc->sc_nvqs++;
+	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
+		i = virtio_read_device_config_2(vsc,
+		    VIRTIO_NET_CONFIG_MAX_QUEUES);
+		vsc->sc_nvqs = 2 * i + 1;
+		i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
+		sc->sc_intrmap = intrmap_create(&sc->sc_dev, i,
+		    va->va_nintr - 2, 0);
+		sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
+		printf(": %u queue%s", sc->sc_nqueues,
+		    sc->sc_nqueues > 1 ? "s"  : "");
+	} else {
+		sc->sc_nqueues = 1;
+		printf(": 1 queue");
+		vsc->sc_nvqs = 2;
+		if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
+			vsc->sc_nvqs++;
+	}
 
 	vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF,
 	    M_WAITOK|M_ZERO);
@@ -729,18 +777,66 @@ vio_attach(struct device *parent, struct device *self, void *aux)
 		else
 			virtio_stop_vq_intr(vsc, vioq->viq_txvq);
 		vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1;
+		KASSERT(vioq->viq_txfree_slots > sc->sc_tx_slots_per_req);
+		if (vioq->viq_txvq->vq_num != sc->sc_q[0].viq_txvq->vq_num) {
+			printf("inequal tx queue size %d: %d != %d\n", i,
+			    vioq->viq_txvq->vq_num,
+			    sc->sc_q[0].viq_txvq->vq_num);
+			goto err;
+		}
+		DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->viq_rxvq,
+		    vioq->viq_txvq);
+
+		if (sc->sc_intrmap != NULL) {
+			vioq->viq_rxvq->vq_intr_vec = i + 2;
+			vioq->viq_txvq->vq_intr_vec = i + 2;
+		}
 	}
 
 	/* control queue */
 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) {
-		sc->sc_ctl_vq = &vsc->sc_vqs[2];
-		if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, 2, 1,
-		    "control") != 0)
+		i = 2;
+		if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
+			i = 2 * virtio_read_device_config_2(vsc,
+			    VIRTIO_NET_CONFIG_MAX_QUEUES);
+		}
+		sc->sc_ctl_vq =  &vsc->sc_vqs[i];
+		if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, "control") != 0)
 			goto err;
 		sc->sc_ctl_vq->vq_done = vio_ctrleof;
+		if (sc->sc_intrmap != NULL)
+			sc->sc_ctl_vq->vq_intr_vec = 1;
 		virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
 	}
 
+	if (sc->sc_intrmap) {
+		r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr,
+		    vsc);
+		if (r != 0) {
+			printf("%s: cannot alloc config intr: %d\n",
+			    sc->sc_dev.dv_xname, r);
+			goto err;
+		}
+		r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr,
+		    sc->sc_ctl_vq);
+		if (r != 0) {
+			printf("%s: cannot alloc ctrl intr: %d\n",
+			    sc->sc_dev.dv_xname, r);
+			goto err;
+		}
+		for (i = 0; i < sc->sc_nqueues; i++) {
+			struct cpu_info *ci = NULL;
+			ci = intrmap_cpu(sc->sc_intrmap, i);
+			r = virtio_intr_establish(vsc, va, i + 2, ci,
+			    vio_queue_intr, &sc->sc_q[i]);
+			if (r != 0) {
+				printf("%s: cannot alloc q%d intr: %d\n",
+				    sc->sc_dev.dv_xname, i, r);
+				goto err;
+			}
+		}
+	}
+
 	if (vio_alloc_mem(sc, tx_max_segments) < 0)
 		goto err;
 
@@ -760,6 +856,11 @@ vio_attach(struct device *parent, struct device *self, void *aux)
 	if (virtio_attach_finish(vsc, va) != 0)
 		goto err;
 
+	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
+		/* ctrl queue works only after DRIVER_OK */
+		vio_ctrl_mq(sc);
+	}
+
 	if_attach(ifp);
 	ether_ifattach(ifp);
 	vio_link_state(ifp);
@@ -805,6 +906,33 @@ vio_link_state(struct ifnet *ifp)
 	}
 }
 
+/* interrupt handlers for multi-queue */
+int
+vio_queue_intr(void *arg)
+{
+	struct vio_queue *vioq = arg;
+	struct virtio_softc *vsc = vioq->viq_sc->sc_virtio;
+	int r;
+	r = virtio_check_vq(vsc, vioq->viq_txvq);
+	r |= virtio_check_vq(vsc, vioq->viq_rxvq);
+	return r;
+}
+
+int
+vio_config_intr(void *arg)
+{
+	struct virtio_softc *vsc = arg;
+	return vio_config_change(vsc);
+}
+
+int
+vio_ctrl_intr(void *arg)
+{
+	struct virtqueue *vq = arg;
+	return virtio_check_vq(vq->vq_owner, vq);
+}
+
+
 int
 vio_config_change(struct virtio_softc *vsc)
 {
@@ -913,6 +1041,8 @@ vio_stop(struct ifnet *ifp, int disable)
 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
 		virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
 	virtio_reinit_end(vsc);
+	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ))
+		vio_ctrl_mq(sc);
 	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
 		vio_ctrl_wakeup(sc, FREE);
 }
@@ -1137,6 +1267,33 @@ vio_dump(struct vio_softc *sc)
 }
 #endif
 
+static int
+vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri)
+{
+	struct if_rxring_info *ifrs, *ifr;
+	int error;
+	unsigned int i;
+
+	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
+	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
+	if (ifrs == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < sc->sc_nqueues; i++) {
+		ifr = &ifrs[i];
+
+		ifr->ifr_size = sc->sc_rx_mbuf_size;
+		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
+		ifr->ifr_info = sc->sc_q[i].viq_rxring;
+	}
+
+	error = if_rxr_info_ioctl(ifri, i, ifrs);
+
+	free(ifrs, M_TEMP, i * sizeof(*ifrs));
+
+	return (error);
+}
+
 int
 vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
@@ -1171,8 +1328,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 	case SIOCGIFRXR:
-		r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
-		    NULL, sc->sc_rx_mbuf_size, &sc->sc_q[0].viq_rxring);
+		r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
 		break;
 	default:
 		r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
@@ -1666,6 +1822,8 @@ vio_ctrl_submit(struct vio_softc *sc, int slot)
 			vio_ctrl_wakeup(sc, RESET);
 			return ENXIO;
 		}
+		if (cold)
+			virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq);
 	}
 
 	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
@@ -1723,6 +1881,41 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
 	return r;
 }
 
+/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */
+int
+vio_ctrl_mq(struct vio_softc *sc)
+{
+	struct virtio_softc *vsc = sc->sc_virtio;
+	struct virtqueue *vq = sc->sc_ctl_vq;
+	int r, slot;
+
+
+	r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MQ,
+	    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 1, &slot);
+	if (r != 0)
+		return r;
+
+	sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues;
+
+	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mq_pairs,
+	    sizeof(*sc->sc_ctrl_mq_pairs), 1);
+
+	r = vio_ctrl_submit(sc, slot);
+
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs,
+	    sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE);
+
+	if (r != 0)
+		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname,
+		    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET);
+
+	DPRINTF("%s: cmd %d %d: %d\n", __func__,
+	    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r);
+
+	vio_ctrl_finish(sc);
+	return r;
+}
+
 int
 vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
 {
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index 207e43ce9b8..a0727e76ee8 100644
--- a/sys/dev/pv/virtiovar.h
+++ b/sys/dev/pv/virtiovar.h
@@ -165,6 +165,8 @@ struct virtio_ops {
 	int		(*attach_finish)(struct virtio_softc *, struct virtio_attach_args *);
 	int		(*poll_intr)(void *);
 	void		(*intr_barrier)(struct virtio_softc *);
+	int		(*intr_establish)(struct virtio_softc *, struct virtio_attach_args *,
+			    int, struct cpu_info *, int (*)(void *), void *);
 };
 
 #define VIRTIO_CHILD_ERROR	((void*)1)
@@ -208,6 +210,14 @@ struct virtio_softc {
 #define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
 #define	virtio_intr_barrier(sc)			(sc)->sc_ops->intr_barrier(sc)
 
+/*
+ * virtio_intr_establish() only works if va_nintr > 1. If it is called by a
+ * child driver, the transport driver will skip automatic intr allocation and
+ * the child driver must allocate all required interrupts itself. Vector 0 is
+ * always used for the config change interrupt.
+ */
+#define	virtio_intr_establish(sc, va, v, ci, fn, a)	(sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a)
+
 /* only for transport drivers */
 #define	virtio_device_reset(sc)			virtio_set_status((sc), 0)