Download raw body.
On Tue, Jan 07, 2025 at 09:26:55AM +0100, Stefan Fritsch wrote:
> Hi,
>
> this diff finally enables multiqueue for vio(4). It goes on top of the
> "virtio: Support unused virtqueues" diff from my previous mail.
>
> The distribution of of packets to the enabled queues is not optimal. To
> improve this, one would need the optional RSS (receive-side scaling)
> feature which is difficult to configure with libvirt/qemu and therefore
> usually not available on hypervisors. Things may improve with future
> libvirt versions. RSS support is not included in this diff. But even
> without RSS, we have seen some nice performance gains.
>
> We use a single interrupt vector for every rx/tx queue pair. With config
> and control queue vectors, we need N+2 vectors for N queues. If
> multi-queue is not available, the old scheme is used with either one
> vector per virtqueue or one vector for all queues.
>
> * virtio: Add API to establish interrupts on specific cpus in child
> drivers. Also make virtio_pci_setup_msix return proper errno.
>
> * virtio_pci: Increase max number of MSIX vectors
>
> * vio: Configure multiple queues and allocate proper interrupts.
>
>
> I am not entirely happy with the API for establishing interrupts. But
> there are several variants how interrupts need to be handled (virtio pci
> with or without MSIX, virtio mmio) and I want to keep those details from
> the child drivers as far as possible. The way I have implemented it, only
> the child drivers that need to allocate per-cpu interrupts need to deal
> with the new API. If anyone has a better idea, I would be interested to
> hear it.
>
> comments? ok?
I have tested it on KVM, on vmd, and with SEV bounce buffers.
With parallel send and receive of UDP packets it seems to be faster.
But there is a lot of variation in the tests. TCP has the bottle
neck somewhere else, there is no significant difference.
OK bluhm@
> diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
> index da7f2c3bea3..3ca631dcca0 100644
> --- a/sys/dev/fdt/virtio_mmio.c
> +++ b/sys/dev/fdt/virtio_mmio.c
> @@ -105,6 +105,8 @@ int virtio_mmio_negotiate_features(struct virtio_softc *,
> const struct virtio_feature_name *);
> int virtio_mmio_intr(void *);
> void virtio_mmio_intr_barrier(struct virtio_softc *);
> +int virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *,
> + int, struct cpu_info *, int (*)(void *), void *);
>
> struct virtio_mmio_softc {
> struct virtio_softc sc_sc;
> @@ -160,6 +162,7 @@ const struct virtio_ops virtio_mmio_ops = {
> virtio_mmio_attach_finish,
> virtio_mmio_intr,
> virtio_mmio_intr_barrier,
> + virtio_mmio_intr_establish,
> };
>
> uint16_t
> @@ -546,3 +549,11 @@ virtio_mmio_intr_barrier(struct virtio_softc *vsc)
> if (sc->sc_ih)
> intr_barrier(sc->sc_ih);
> }
> +
> +int
> +virtio_mmio_intr_establish(struct virtio_softc *vsc,
> + struct virtio_attach_args *va, int vec, struct cpu_info *ci,
> + int (*func)(void *), void *arg)
> +{
> + return ENXIO;
> +}
> diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
> index 8463f6223de..5f91d0ebe77 100644
> --- a/sys/dev/pci/virtio_pci.c
> +++ b/sys/dev/pci/virtio_pci.c
> @@ -50,7 +50,7 @@
> * XXX: PCI-endian while the device specific registers are native endian.
> */
>
> -#define MAX_MSIX_VECS 8
> +#define MAX_MSIX_VECS 16
>
> struct virtio_pci_softc;
> struct virtio_pci_attach_args;
> @@ -62,7 +62,7 @@ int virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p
> int virtio_pci_detach(struct device *, int);
>
> void virtio_pci_kick(struct virtio_softc *, uint16_t);
> -int virtio_pci_adjust_config_region(struct virtio_pci_softc *);
> +int virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset);
> uint8_t virtio_pci_read_device_config_1(struct virtio_softc *, int);
> uint16_t virtio_pci_read_device_config_2(struct virtio_softc *, int);
> uint32_t virtio_pci_read_device_config_4(struct virtio_softc *, int);
> @@ -81,9 +81,10 @@ int virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_fe
> int virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
> void virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
> void virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
> -int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *);
> +int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
> int virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int);
> void virtio_pci_intr_barrier(struct virtio_softc *);
> +int virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
> void virtio_pci_free_irqs(struct virtio_pci_softc *);
> int virtio_pci_poll_intr(void *);
> int virtio_pci_legacy_intr(void *);
> @@ -100,6 +101,7 @@ enum irq_type {
> IRQ_NO_MSIX,
> IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
> IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
> + IRQ_MSIX_CHILD, /* assigned by child driver */
> };
>
> struct virtio_pci_intr {
> @@ -179,6 +181,7 @@ const struct virtio_ops virtio_pci_ops = {
> virtio_pci_attach_finish,
> virtio_pci_poll_intr,
> virtio_pci_intr_barrier,
> + virtio_pci_intr_establish,
> };
>
> static inline uint64_t
> @@ -648,10 +651,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux)
> goto free;
> }
>
> - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
> sc->sc_irq_type = IRQ_NO_MSIX;
> - if (virtio_pci_adjust_config_region(sc) != 0)
> - goto err;
> + if (virtio_pci_adjust_config_region(sc,
> + VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0)
> + {
> + goto free;
> + }
>
> virtio_device_reset(vsc);
> virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
> @@ -692,7 +697,9 @@ virtio_pci_attach_finish(struct virtio_softc *vsc,
> pci_chipset_tag_t pc = vpa->vpa_pa->pa_pc;
> char const *intrstr;
>
> - if (virtio_pci_setup_msix(sc, vpa, 0) == 0) {
> + if (sc->sc_irq_type == IRQ_MSIX_CHILD) {
> + intrstr = "msix";
> + } else if (virtio_pci_setup_msix(sc, vpa, 0) == 0) {
> sc->sc_irq_type = IRQ_MSIX_PER_VQ;
> intrstr = "msix per-VQ";
> } else if (virtio_pci_setup_msix(sc, vpa, 1) == 0) {
> @@ -754,11 +761,14 @@ virtio_pci_detach(struct device *self, int flags)
> }
>
> int
> -virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
> +virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset)
> {
> if (sc->sc_sc.sc_version_1)
> return 0;
> - sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
> + if (sc->sc_devcfg_offset == offset)
> + return 0;
> + sc->sc_devcfg_offset = offset;
> + sc->sc_devcfg_iosize = sc->sc_iosize - offset;
> sc->sc_devcfg_iot = sc->sc_iot;
> if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
> sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
> @@ -958,30 +968,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc,
>
> int
> virtio_pci_msix_establish(struct virtio_pci_softc *sc,
> - struct virtio_pci_attach_args *vpa, int idx,
> + struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci,
> int (*handler)(void *), void *ih_arg)
> {
> struct virtio_softc *vsc = &sc->sc_sc;
> pci_intr_handle_t ih;
> + int r;
>
> KASSERT(idx < sc->sc_nintr);
>
> - if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) {
> + r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih);
> + if (r != 0) {
> #if VIRTIO_DEBUG
> printf("%s[%d]: pci_intr_map_msix failed\n",
> vsc->sc_dev.dv_xname, idx);
> #endif
> - return 1;
> + return r;
> }
> snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d",
> vsc->sc_child->dv_xname, idx);
> - sc->sc_intr[idx].ih = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
> - handler, ih_arg, sc->sc_intr[idx].name);
> + sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl,
> + ci, handler, ih_arg, sc->sc_intr[idx].name);
> if (sc->sc_intr[idx].ih == NULL) {
> printf("%s[%d]: couldn't establish msix interrupt\n",
> - vsc->sc_dev.dv_xname, idx);
> - return 1;
> + vsc->sc_child->dv_xname, idx);
> + return ENOMEM;
> }
> + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI);
> return 0;
> }
>
> @@ -1031,8 +1044,8 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc)
> }
> }
>
> - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
> - virtio_pci_adjust_config_region(sc);
> + /* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */
> + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI);
> }
>
> int
> @@ -1040,34 +1053,33 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
> struct virtio_pci_attach_args *vpa, int shared)
> {
> struct virtio_softc *vsc = &sc->sc_sc;
> - int i;
> + int i, r = 0;
>
> /* Shared needs config + queue */
> if (shared && vpa->vpa_va.va_nintr < 1 + 1)
> - return 1;
> + return ERANGE;
> /* Per VQ needs config + N * queue */
> if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs)
> - return 1;
> + return ERANGE;
>
> - if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc))
> - return 1;
> - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
> - virtio_pci_adjust_config_region(sc);
> + r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc);
> + if (r != 0)
> + return r;
>
> if (shared) {
> - if (virtio_pci_msix_establish(sc, vpa, 1,
> - virtio_pci_shared_queue_intr, vsc)) {
> + r = virtio_pci_msix_establish(sc, vpa, 1, NULL,
> + virtio_pci_shared_queue_intr, vsc);
> + if (r != 0)
> goto fail;
> - }
>
> for (i = 0; i < vsc->sc_nvqs; i++)
> vsc->sc_vqs[i].vq_intr_vec = 1;
> } else {
> for (i = 0; i < vsc->sc_nvqs; i++) {
> - if (virtio_pci_msix_establish(sc, vpa, i + 1,
> - virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
> + r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL,
> + virtio_pci_queue_intr, &vsc->sc_vqs[i]);
> + if (r != 0)
> goto fail;
> - }
> vsc->sc_vqs[i].vq_intr_vec = i + 1;
> }
> }
> @@ -1075,7 +1087,28 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
> return 0;
> fail:
> virtio_pci_free_irqs(sc);
> - return 1;
> + return r;
> +}
> +
> +int
> +virtio_pci_intr_establish(struct virtio_softc *vsc,
> + struct virtio_attach_args *va, int vec, struct cpu_info *ci,
> + int (*func)(void *), void *arg)
> +{
> + struct virtio_pci_attach_args *vpa;
> + struct virtio_pci_softc *sc;
> +
> + if (vsc->sc_ops != &virtio_pci_ops)
> + return ENXIO;
> +
> + vpa = (struct virtio_pci_attach_args *)va;
> + sc = (struct virtio_pci_softc *)vsc;
> +
> + if (vec >= sc->sc_nintr || sc->sc_nintr <= 1)
> + return ERANGE;
> +
> + sc->sc_irq_type = IRQ_MSIX_CHILD;
> + return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg);
> }
>
> void
> diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
> index a728940e314..20d1bfc1ca8 100644
> --- a/sys/dev/pv/if_vio.c
> +++ b/sys/dev/pv/if_vio.c
> @@ -32,8 +32,10 @@
> #include <sys/param.h>
> #include <sys/systm.h>
> #include <sys/device.h>
> +#include <sys/intrmap.h>
> #include <sys/mbuf.h>
> #include <sys/mutex.h>
> +#include <sys/percpu.h> /* for CACHELINESIZE */
> #include <sys/sockio.h>
> #include <sys/timeout.h>
>
> @@ -64,8 +66,15 @@
> * if_vioreg.h:
> */
> /* Configuration registers */
> -#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
> -#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
> +#define VIRTIO_NET_CONFIG_MAC 0 /* 8 bit x 6 byte */
> +#define VIRTIO_NET_CONFIG_STATUS 6 /* 16 bit */
> +#define VIRTIO_NET_CONFIG_MAX_QUEUES 8 /* 16 bit */
> +#define VIRTIO_NET_CONFIG_MTU 10 /* 16 bit */
> +#define VIRTIO_NET_CONFIG_SPEED 12 /* 32 bit */
> +#define VIRTIO_NET_CONFIG_DUPLEX 16 /* 8 bit */
> +#define VIRTIO_NET_CONFIG_RSS_SIZE 17 /* 8 bit */
> +#define VIRTIO_NET_CONFIG_RSS_LEN 18 /* 16 bit */
> +#define VIRTIO_NET_CONFIG_HASH_TYPES 20 /* 16 bit */
>
> /* Feature bits */
> #define VIRTIO_NET_F_CSUM (1ULL<<0)
> @@ -183,6 +192,11 @@ struct virtio_net_ctrl_cmd {
> # define VIRTIO_NET_CTRL_VLAN_ADD 0
> # define VIRTIO_NET_CTRL_VLAN_DEL 1
>
> +#define VIRTIO_NET_CTRL_MQ 4
> +# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0
> +# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1
> +# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2
> +
> #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5
> # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0
>
> @@ -196,6 +210,12 @@ struct virtio_net_ctrl_rx {
> uint8_t onoff;
> } __packed;
>
> +struct virtio_net_ctrl_mq_pairs_set {
> + uint16_t virtqueue_pairs;
> +};
> +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1
> +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000
> +
> struct virtio_net_ctrl_guest_offloads {
> uint64_t offloads;
> } __packed;
> @@ -231,7 +251,7 @@ struct vio_queue {
> struct virtqueue *viq_txvq;
> struct mutex viq_txmtx, viq_rxmtx;
> int viq_txfree_slots;
> -};
> +} __aligned(CACHELINESIZE);
>
> struct vio_softc {
> struct device sc_dev;
> @@ -251,14 +271,16 @@ struct vio_softc {
> caddr_t sc_dma_kva;
>
> int sc_hdr_size;
> - struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
> - struct virtio_net_ctrl_status *sc_ctrl_status;
> - struct virtio_net_ctrl_rx *sc_ctrl_rx;
> - struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
> - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
> + struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
> + struct virtio_net_ctrl_status *sc_ctrl_status;
> + struct virtio_net_ctrl_rx *sc_ctrl_rx;
> + struct virtio_net_ctrl_mq_pairs_set *sc_ctrl_mq_pairs;
> + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
> + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
> #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
> - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
> + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
>
> + struct intrmap *sc_intrmap;
> struct vio_queue *sc_q;
> uint16_t sc_nqueues;
> int sc_tx_slots_per_req;
> @@ -317,10 +339,15 @@ void vio_tx_drain(struct vio_softc *);
> int vio_encap(struct vio_queue *, int, struct mbuf *);
> void vio_txtick(void *);
>
> +int vio_queue_intr(void *);
> +int vio_config_intr(void *);
> +int vio_ctrl_intr(void *);
> +
> /* other control */
> void vio_link_state(struct ifnet *);
> int vio_config_change(struct virtio_softc *);
> int vio_ctrl_rx(struct vio_softc *, int, int);
> +int vio_ctrl_mq(struct vio_softc *);
> int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
> int vio_set_rx_filter(struct vio_softc *);
> void vio_iff(struct vio_softc *);
> @@ -408,6 +435,8 @@ vio_free_dmamem(struct vio_softc *sc)
> * sc_ctrl_status: return value for a command via ctrl vq (READ)
> * sc_ctrl_rx: parameter for a VIRTIO_NET_CTRL_RX class command
> * (WRITE)
> + * sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE)
> + * sc_ctrl_guest_offloads: configure offload features (WRITE)
> * sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
> * class command (WRITE)
> * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
> @@ -449,6 +478,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
> allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
> allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
> allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
> + allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1;
> allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
> allocsize += VIO_CTRL_MAC_INFO_SIZE;
> }
> @@ -474,6 +504,8 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
> offset += sizeof(*sc->sc_ctrl_status);
> sc->sc_ctrl_rx = (void *)(kva + offset);
> offset += sizeof(*sc->sc_ctrl_rx);
> + sc->sc_ctrl_mq_pairs = (void *)(kva + offset);
> + offset += sizeof(*sc->sc_ctrl_mq_pairs);
> sc->sc_ctrl_guest_offloads = (void *)(kva + offset);
> offset += sizeof(*sc->sc_ctrl_guest_offloads);
> sc->sc_ctrl_mac_tbl_uc = (void *)(kva + offset);
> @@ -598,7 +630,7 @@ vio_attach(struct device *parent, struct device *self, void *aux)
> struct vio_softc *sc = (struct vio_softc *)self;
> struct virtio_softc *vsc = (struct virtio_softc *)parent;
> struct virtio_attach_args *va = aux;
> - int i, tx_max_segments;
> + int i, r, tx_max_segments;
> struct ifnet *ifp = &sc->sc_ac.ac_if;
>
> if (vsc->sc_child != NULL) {
> @@ -616,6 +648,9 @@ vio_attach(struct device *parent, struct device *self, void *aux)
> VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
> VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM;
>
> + if (va->va_nintr > 3 && ncpus > 1)
> + vsc->sc_driver_features |= VIRTIO_NET_F_MQ;
> +
> vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
> vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
>
> @@ -626,10 +661,23 @@ vio_attach(struct device *parent, struct device *self, void *aux)
> if (virtio_negotiate_features(vsc, virtio_net_feature_names) != 0)
> goto err;
>
> - sc->sc_nqueues = 1;
> - vsc->sc_nvqs = 2 * sc->sc_nqueues;
> - if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
> - vsc->sc_nvqs++;
> + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
> + i = virtio_read_device_config_2(vsc,
> + VIRTIO_NET_CONFIG_MAX_QUEUES);
> + vsc->sc_nvqs = 2 * i + 1;
> + i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
> + sc->sc_intrmap = intrmap_create(&sc->sc_dev, i,
> + va->va_nintr - 2, 0);
> + sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
> + printf(": %u queue%s", sc->sc_nqueues,
> + sc->sc_nqueues > 1 ? "s" : "");
> + } else {
> + sc->sc_nqueues = 1;
> + printf(": 1 queue");
> + vsc->sc_nvqs = 2;
> + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
> + vsc->sc_nvqs++;
> + }
>
> vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF,
> M_WAITOK|M_ZERO);
> @@ -729,18 +777,66 @@ vio_attach(struct device *parent, struct device *self, void *aux)
> else
> virtio_stop_vq_intr(vsc, vioq->viq_txvq);
> vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1;
> + KASSERT(vioq->viq_txfree_slots > sc->sc_tx_slots_per_req);
> + if (vioq->viq_txvq->vq_num != sc->sc_q[0].viq_txvq->vq_num) {
> + printf("inequal tx queue size %d: %d != %d\n", i,
> + vioq->viq_txvq->vq_num,
> + sc->sc_q[0].viq_txvq->vq_num);
> + goto err;
> + }
> + DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->viq_rxvq,
> + vioq->viq_txvq);
> +
> + if (sc->sc_intrmap != NULL) {
> + vioq->viq_rxvq->vq_intr_vec = i + 2;
> + vioq->viq_txvq->vq_intr_vec = i + 2;
> + }
> }
>
> /* control queue */
> if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) {
> - sc->sc_ctl_vq = &vsc->sc_vqs[2];
> - if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, 2, 1,
> - "control") != 0)
> + i = 2;
> + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
> + i = 2 * virtio_read_device_config_2(vsc,
> + VIRTIO_NET_CONFIG_MAX_QUEUES);
> + }
> + sc->sc_ctl_vq = &vsc->sc_vqs[i];
> + if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, "control") != 0)
> goto err;
> sc->sc_ctl_vq->vq_done = vio_ctrleof;
> + if (sc->sc_intrmap != NULL)
> + sc->sc_ctl_vq->vq_intr_vec = 1;
> virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
> }
>
> + if (sc->sc_intrmap) {
> + r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr,
> + vsc);
> + if (r != 0) {
> + printf("%s: cannot alloc config intr: %d\n",
> + sc->sc_dev.dv_xname, r);
> + goto err;
> + }
> + r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr,
> + sc->sc_ctl_vq);
> + if (r != 0) {
> + printf("%s: cannot alloc ctrl intr: %d\n",
> + sc->sc_dev.dv_xname, r);
> + goto err;
> + }
> + for (i = 0; i < sc->sc_nqueues; i++) {
> + struct cpu_info *ci = NULL;
> + ci = intrmap_cpu(sc->sc_intrmap, i);
> + r = virtio_intr_establish(vsc, va, i + 2, ci,
> + vio_queue_intr, &sc->sc_q[i]);
> + if (r != 0) {
> + printf("%s: cannot alloc q%d intr: %d\n",
> + sc->sc_dev.dv_xname, i, r);
> + goto err;
> + }
> + }
> + }
> +
> if (vio_alloc_mem(sc, tx_max_segments) < 0)
> goto err;
>
> @@ -760,6 +856,11 @@ vio_attach(struct device *parent, struct device *self, void *aux)
> if (virtio_attach_finish(vsc, va) != 0)
> goto err;
>
> + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
> + /* ctrl queue works only after DRIVER_OK */
> + vio_ctrl_mq(sc);
> + }
> +
> if_attach(ifp);
> ether_ifattach(ifp);
> vio_link_state(ifp);
> @@ -805,6 +906,33 @@ vio_link_state(struct ifnet *ifp)
> }
> }
>
> +/* interrupt handlers for multi-queue */
> +int
> +vio_queue_intr(void *arg)
> +{
> + struct vio_queue *vioq = arg;
> + struct virtio_softc *vsc = vioq->viq_sc->sc_virtio;
> + int r;
> + r = virtio_check_vq(vsc, vioq->viq_txvq);
> + r |= virtio_check_vq(vsc, vioq->viq_rxvq);
> + return r;
> +}
> +
> +int
> +vio_config_intr(void *arg)
> +{
> + struct virtio_softc *vsc = arg;
> + return vio_config_change(vsc);
> +}
> +
> +int
> +vio_ctrl_intr(void *arg)
> +{
> + struct virtqueue *vq = arg;
> + return virtio_check_vq(vq->vq_owner, vq);
> +}
> +
> +
> int
> vio_config_change(struct virtio_softc *vsc)
> {
> @@ -913,6 +1041,8 @@ vio_stop(struct ifnet *ifp, int disable)
> if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
> virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
> virtio_reinit_end(vsc);
> + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ))
> + vio_ctrl_mq(sc);
> if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
> vio_ctrl_wakeup(sc, FREE);
> }
> @@ -1137,6 +1267,33 @@ vio_dump(struct vio_softc *sc)
> }
> #endif
>
> +static int
> +vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri)
> +{
> + struct if_rxring_info *ifrs, *ifr;
> + int error;
> + unsigned int i;
> +
> + ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
> + M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
> + if (ifrs == NULL)
> + return (ENOMEM);
> +
> + for (i = 0; i < sc->sc_nqueues; i++) {
> + ifr = &ifrs[i];
> +
> + ifr->ifr_size = sc->sc_rx_mbuf_size;
> + snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
> + ifr->ifr_info = sc->sc_q[i].viq_rxring;
> + }
> +
> + error = if_rxr_info_ioctl(ifri, i, ifrs);
> +
> + free(ifrs, M_TEMP, i * sizeof(*ifrs));
> +
> + return (error);
> +}
> +
> int
> vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> {
> @@ -1171,8 +1328,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
> r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
> break;
> case SIOCGIFRXR:
> - r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
> - NULL, sc->sc_rx_mbuf_size, &sc->sc_q[0].viq_rxring);
> + r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
> break;
> default:
> r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
> @@ -1666,6 +1822,8 @@ vio_ctrl_submit(struct vio_softc *sc, int slot)
> vio_ctrl_wakeup(sc, RESET);
> return ENXIO;
> }
> + if (cold)
> + virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq);
> }
>
> VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
> @@ -1723,6 +1881,41 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
> return r;
> }
>
> +/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */
> +int
> +vio_ctrl_mq(struct vio_softc *sc)
> +{
> + struct virtio_softc *vsc = sc->sc_virtio;
> + struct virtqueue *vq = sc->sc_ctl_vq;
> + int r, slot;
> +
> +
> + r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MQ,
> + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 1, &slot);
> + if (r != 0)
> + return r;
> +
> + sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues;
> +
> + vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mq_pairs,
> + sizeof(*sc->sc_ctrl_mq_pairs), 1);
> +
> + r = vio_ctrl_submit(sc, slot);
> +
> + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs,
> + sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE);
> +
> + if (r != 0)
> + printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname,
> + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET);
> +
> + DPRINTF("%s: cmd %d %d: %d\n", __func__,
> + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r);
> +
> + vio_ctrl_finish(sc);
> + return r;
> +}
> +
> int
> vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
> {
> diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
> index 207e43ce9b8..a0727e76ee8 100644
> --- a/sys/dev/pv/virtiovar.h
> +++ b/sys/dev/pv/virtiovar.h
> @@ -165,6 +165,8 @@ struct virtio_ops {
> int (*attach_finish)(struct virtio_softc *, struct virtio_attach_args *);
> int (*poll_intr)(void *);
> void (*intr_barrier)(struct virtio_softc *);
> + int (*intr_establish)(struct virtio_softc *, struct virtio_attach_args *,
> + int, struct cpu_info *, int (*)(void *), void *);
> };
>
> #define VIRTIO_CHILD_ERROR ((void*)1)
> @@ -208,6 +210,14 @@ struct virtio_softc {
> #define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i)
> #define virtio_intr_barrier(sc) (sc)->sc_ops->intr_barrier(sc)
>
> +/*
> + * virtio_intr_establish() only works if va_nintr > 1. If it is called by a
> + * child driver, the transport driver will skip automatic intr allocation and
> + * the child driver must allocate all required interrupts itself. Vector 0 is
> + * always used for the config change interrupt.
> + */
> +#define virtio_intr_establish(sc, va, v, ci, fn, a) (sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a)
> +
> /* only for transport drivers */
> #define virtio_device_reset(sc) virtio_set_status((sc), 0)
>