From: Alexander Bluhm Subject: Re: vio: Enable multiqueue To: Stefan Fritsch Cc: tech@openbsd.org Date: Mon, 13 Jan 2025 22:35:27 +0100 On Tue, Jan 07, 2025 at 09:26:55AM +0100, Stefan Fritsch wrote: > Hi, > > this diff finally enables multiqueue for vio(4). It goes on top of the > "virtio: Support unused virtqueues" diff from my previous mail. > > The distribution of of packets to the enabled queues is not optimal. To > improve this, one would need the optional RSS (receive-side scaling) > feature which is difficult to configure with libvirt/qemu and therefore > usually not available on hypervisors. Things may improve with future > libvirt versions. RSS support is not included in this diff. But even > without RSS, we have seen some nice performance gains. > > We use a single interrupt vector for every rx/tx queue pair. With config > and control queue vectors, we need N+2 vectors for N queues. If > multi-queue is not available, the old scheme is used with either one > vector per virtqueue or one vector for all queues. > > * virtio: Add API to establish interrupts on specific cpus in child > drivers. Also make virtio_pci_setup_msix return proper errno. > > * virtio_pci: Increase max number of MSIX vectors > > * vio: Configure multiple queues and allocate proper interrupts. > > > I am not entirely happy with the API for establishing interrupts. But > there are several variants how interrupts need to be handled (virtio pci > with or without MSIX, virtio mmio) and I want to keep those details from > the child drivers as far as possible. The way I have implemented it, only > the child drivers that need to allocate per-cpu interrupts need to deal > with the new API. If anyone has a better idea, I would be interested to > hear it. > > comments? ok? I have tested it on KVM, on vmd, and with SEV bounce buffers. With parallel send and receive of UDP packets it seems to be faster. But there is a lot of variation in the tests. TCP has the bottle neck somewhere else, there is no significant difference. OK bluhm@ > diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c > index da7f2c3bea3..3ca631dcca0 100644 > --- a/sys/dev/fdt/virtio_mmio.c > +++ b/sys/dev/fdt/virtio_mmio.c > @@ -105,6 +105,8 @@ int virtio_mmio_negotiate_features(struct virtio_softc *, > const struct virtio_feature_name *); > int virtio_mmio_intr(void *); > void virtio_mmio_intr_barrier(struct virtio_softc *); > +int virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *, > + int, struct cpu_info *, int (*)(void *), void *); > > struct virtio_mmio_softc { > struct virtio_softc sc_sc; > @@ -160,6 +162,7 @@ const struct virtio_ops virtio_mmio_ops = { > virtio_mmio_attach_finish, > virtio_mmio_intr, > virtio_mmio_intr_barrier, > + virtio_mmio_intr_establish, > }; > > uint16_t > @@ -546,3 +549,11 @@ virtio_mmio_intr_barrier(struct virtio_softc *vsc) > if (sc->sc_ih) > intr_barrier(sc->sc_ih); > } > + > +int > +virtio_mmio_intr_establish(struct virtio_softc *vsc, > + struct virtio_attach_args *va, int vec, struct cpu_info *ci, > + int (*func)(void *), void *arg) > +{ > + return ENXIO; > +} > diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c > index 8463f6223de..5f91d0ebe77 100644 > --- a/sys/dev/pci/virtio_pci.c > +++ b/sys/dev/pci/virtio_pci.c > @@ -50,7 +50,7 @@ > * XXX: PCI-endian while the device specific registers are native endian. > */ > > -#define MAX_MSIX_VECS 8 > +#define MAX_MSIX_VECS 16 > > struct virtio_pci_softc; > struct virtio_pci_attach_args; > @@ -62,7 +62,7 @@ int virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p > int virtio_pci_detach(struct device *, int); > > void virtio_pci_kick(struct virtio_softc *, uint16_t); > -int virtio_pci_adjust_config_region(struct virtio_pci_softc *); > +int virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset); > uint8_t virtio_pci_read_device_config_1(struct virtio_softc *, int); > uint16_t virtio_pci_read_device_config_2(struct virtio_softc *, int); > uint32_t virtio_pci_read_device_config_4(struct virtio_softc *, int); > @@ -81,9 +81,10 @@ int virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_fe > int virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *); > void virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t); > void virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t); > -int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *); > +int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *); > int virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int); > void virtio_pci_intr_barrier(struct virtio_softc *); > +int virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *); > void virtio_pci_free_irqs(struct virtio_pci_softc *); > int virtio_pci_poll_intr(void *); > int virtio_pci_legacy_intr(void *); > @@ -100,6 +101,7 @@ enum irq_type { > IRQ_NO_MSIX, > IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */ > IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */ > + IRQ_MSIX_CHILD, /* assigned by child driver */ > }; > > struct virtio_pci_intr { > @@ -179,6 +181,7 @@ const struct virtio_ops virtio_pci_ops = { > virtio_pci_attach_finish, > virtio_pci_poll_intr, > virtio_pci_intr_barrier, > + virtio_pci_intr_establish, > }; > > static inline uint64_t > @@ -648,10 +651,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) > goto free; > } > > - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; > sc->sc_irq_type = IRQ_NO_MSIX; > - if (virtio_pci_adjust_config_region(sc) != 0) > - goto err; > + if (virtio_pci_adjust_config_region(sc, > + VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0) > + { > + goto free; > + } > > virtio_device_reset(vsc); > virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK); > @@ -692,7 +697,9 @@ virtio_pci_attach_finish(struct virtio_softc *vsc, > pci_chipset_tag_t pc = vpa->vpa_pa->pa_pc; > char const *intrstr; > > - if (virtio_pci_setup_msix(sc, vpa, 0) == 0) { > + if (sc->sc_irq_type == IRQ_MSIX_CHILD) { > + intrstr = "msix"; > + } else if (virtio_pci_setup_msix(sc, vpa, 0) == 0) { > sc->sc_irq_type = IRQ_MSIX_PER_VQ; > intrstr = "msix per-VQ"; > } else if (virtio_pci_setup_msix(sc, vpa, 1) == 0) { > @@ -754,11 +761,14 @@ virtio_pci_detach(struct device *self, int flags) > } > > int > -virtio_pci_adjust_config_region(struct virtio_pci_softc *sc) > +virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset) > { > if (sc->sc_sc.sc_version_1) > return 0; > - sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset; > + if (sc->sc_devcfg_offset == offset) > + return 0; > + sc->sc_devcfg_offset = offset; > + sc->sc_devcfg_iosize = sc->sc_iosize - offset; > sc->sc_devcfg_iot = sc->sc_iot; > if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset, > sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) { > @@ -958,30 +968,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc, > > int > virtio_pci_msix_establish(struct virtio_pci_softc *sc, > - struct virtio_pci_attach_args *vpa, int idx, > + struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci, > int (*handler)(void *), void *ih_arg) > { > struct virtio_softc *vsc = &sc->sc_sc; > pci_intr_handle_t ih; > + int r; > > KASSERT(idx < sc->sc_nintr); > > - if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) { > + r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih); > + if (r != 0) { > #if VIRTIO_DEBUG > printf("%s[%d]: pci_intr_map_msix failed\n", > vsc->sc_dev.dv_xname, idx); > #endif > - return 1; > + return r; > } > snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d", > vsc->sc_child->dv_xname, idx); > - sc->sc_intr[idx].ih = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl, > - handler, ih_arg, sc->sc_intr[idx].name); > + sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl, > + ci, handler, ih_arg, sc->sc_intr[idx].name); > if (sc->sc_intr[idx].ih == NULL) { > printf("%s[%d]: couldn't establish msix interrupt\n", > - vsc->sc_dev.dv_xname, idx); > - return 1; > + vsc->sc_child->dv_xname, idx); > + return ENOMEM; > } > + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI); > return 0; > } > > @@ -1031,8 +1044,8 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc) > } > } > > - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; > - virtio_pci_adjust_config_region(sc); > + /* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */ > + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI); > } > > int > @@ -1040,34 +1053,33 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc, > struct virtio_pci_attach_args *vpa, int shared) > { > struct virtio_softc *vsc = &sc->sc_sc; > - int i; > + int i, r = 0; > > /* Shared needs config + queue */ > if (shared && vpa->vpa_va.va_nintr < 1 + 1) > - return 1; > + return ERANGE; > /* Per VQ needs config + N * queue */ > if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs) > - return 1; > + return ERANGE; > > - if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc)) > - return 1; > - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI; > - virtio_pci_adjust_config_region(sc); > + r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc); > + if (r != 0) > + return r; > > if (shared) { > - if (virtio_pci_msix_establish(sc, vpa, 1, > - virtio_pci_shared_queue_intr, vsc)) { > + r = virtio_pci_msix_establish(sc, vpa, 1, NULL, > + virtio_pci_shared_queue_intr, vsc); > + if (r != 0) > goto fail; > - } > > for (i = 0; i < vsc->sc_nvqs; i++) > vsc->sc_vqs[i].vq_intr_vec = 1; > } else { > for (i = 0; i < vsc->sc_nvqs; i++) { > - if (virtio_pci_msix_establish(sc, vpa, i + 1, > - virtio_pci_queue_intr, &vsc->sc_vqs[i])) { > + r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL, > + virtio_pci_queue_intr, &vsc->sc_vqs[i]); > + if (r != 0) > goto fail; > - } > vsc->sc_vqs[i].vq_intr_vec = i + 1; > } > } > @@ -1075,7 +1087,28 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc, > return 0; > fail: > virtio_pci_free_irqs(sc); > - return 1; > + return r; > +} > + > +int > +virtio_pci_intr_establish(struct virtio_softc *vsc, > + struct virtio_attach_args *va, int vec, struct cpu_info *ci, > + int (*func)(void *), void *arg) > +{ > + struct virtio_pci_attach_args *vpa; > + struct virtio_pci_softc *sc; > + > + if (vsc->sc_ops != &virtio_pci_ops) > + return ENXIO; > + > + vpa = (struct virtio_pci_attach_args *)va; > + sc = (struct virtio_pci_softc *)vsc; > + > + if (vec >= sc->sc_nintr || sc->sc_nintr <= 1) > + return ERANGE; > + > + sc->sc_irq_type = IRQ_MSIX_CHILD; > + return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg); > } > > void > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c > index a728940e314..20d1bfc1ca8 100644 > --- a/sys/dev/pv/if_vio.c > +++ b/sys/dev/pv/if_vio.c > @@ -32,8 +32,10 @@ > #include > #include > #include > +#include > #include > #include > +#include /* for CACHELINESIZE */ > #include > #include > > @@ -64,8 +66,15 @@ > * if_vioreg.h: > */ > /* Configuration registers */ > -#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ > -#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ > +#define VIRTIO_NET_CONFIG_MAC 0 /* 8 bit x 6 byte */ > +#define VIRTIO_NET_CONFIG_STATUS 6 /* 16 bit */ > +#define VIRTIO_NET_CONFIG_MAX_QUEUES 8 /* 16 bit */ > +#define VIRTIO_NET_CONFIG_MTU 10 /* 16 bit */ > +#define VIRTIO_NET_CONFIG_SPEED 12 /* 32 bit */ > +#define VIRTIO_NET_CONFIG_DUPLEX 16 /* 8 bit */ > +#define VIRTIO_NET_CONFIG_RSS_SIZE 17 /* 8 bit */ > +#define VIRTIO_NET_CONFIG_RSS_LEN 18 /* 16 bit */ > +#define VIRTIO_NET_CONFIG_HASH_TYPES 20 /* 16 bit */ > > /* Feature bits */ > #define VIRTIO_NET_F_CSUM (1ULL<<0) > @@ -183,6 +192,11 @@ struct virtio_net_ctrl_cmd { > # define VIRTIO_NET_CTRL_VLAN_ADD 0 > # define VIRTIO_NET_CTRL_VLAN_DEL 1 > > +#define VIRTIO_NET_CTRL_MQ 4 > +# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 > +# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 > +# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 > + > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > @@ -196,6 +210,12 @@ struct virtio_net_ctrl_rx { > uint8_t onoff; > } __packed; > > +struct virtio_net_ctrl_mq_pairs_set { > + uint16_t virtqueue_pairs; > +}; > +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 > +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 > + > struct virtio_net_ctrl_guest_offloads { > uint64_t offloads; > } __packed; > @@ -231,7 +251,7 @@ struct vio_queue { > struct virtqueue *viq_txvq; > struct mutex viq_txmtx, viq_rxmtx; > int viq_txfree_slots; > -}; > +} __aligned(CACHELINESIZE); > > struct vio_softc { > struct device sc_dev; > @@ -251,14 +271,16 @@ struct vio_softc { > caddr_t sc_dma_kva; > > int sc_hdr_size; > - struct virtio_net_ctrl_cmd *sc_ctrl_cmd; > - struct virtio_net_ctrl_status *sc_ctrl_status; > - struct virtio_net_ctrl_rx *sc_ctrl_rx; > - struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; > - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; > + struct virtio_net_ctrl_cmd *sc_ctrl_cmd; > + struct virtio_net_ctrl_status *sc_ctrl_status; > + struct virtio_net_ctrl_rx *sc_ctrl_rx; > + struct virtio_net_ctrl_mq_pairs_set *sc_ctrl_mq_pairs; > + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; > + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; > #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc > - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; > + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; > > + struct intrmap *sc_intrmap; > struct vio_queue *sc_q; > uint16_t sc_nqueues; > int sc_tx_slots_per_req; > @@ -317,10 +339,15 @@ void vio_tx_drain(struct vio_softc *); > int vio_encap(struct vio_queue *, int, struct mbuf *); > void vio_txtick(void *); > > +int vio_queue_intr(void *); > +int vio_config_intr(void *); > +int vio_ctrl_intr(void *); > + > /* other control */ > void vio_link_state(struct ifnet *); > int vio_config_change(struct virtio_softc *); > int vio_ctrl_rx(struct vio_softc *, int, int); > +int vio_ctrl_mq(struct vio_softc *); > int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t); > int vio_set_rx_filter(struct vio_softc *); > void vio_iff(struct vio_softc *); > @@ -408,6 +435,8 @@ vio_free_dmamem(struct vio_softc *sc) > * sc_ctrl_status: return value for a command via ctrl vq (READ) > * sc_ctrl_rx: parameter for a VIRTIO_NET_CTRL_RX class command > * (WRITE) > + * sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE) > + * sc_ctrl_guest_offloads: configure offload features (WRITE) > * sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC > * class command (WRITE) > * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC > @@ -449,6 +478,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) > allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; > allocsize += sizeof(struct virtio_net_ctrl_status) * 1; > allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; > + allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1; > allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1; > allocsize += VIO_CTRL_MAC_INFO_SIZE; > } > @@ -474,6 +504,8 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) > offset += sizeof(*sc->sc_ctrl_status); > sc->sc_ctrl_rx = (void *)(kva + offset); > offset += sizeof(*sc->sc_ctrl_rx); > + sc->sc_ctrl_mq_pairs = (void *)(kva + offset); > + offset += sizeof(*sc->sc_ctrl_mq_pairs); > sc->sc_ctrl_guest_offloads = (void *)(kva + offset); > offset += sizeof(*sc->sc_ctrl_guest_offloads); > sc->sc_ctrl_mac_tbl_uc = (void *)(kva + offset); > @@ -598,7 +630,7 @@ vio_attach(struct device *parent, struct device *self, void *aux) > struct vio_softc *sc = (struct vio_softc *)self; > struct virtio_softc *vsc = (struct virtio_softc *)parent; > struct virtio_attach_args *va = aux; > - int i, tx_max_segments; > + int i, r, tx_max_segments; > struct ifnet *ifp = &sc->sc_ac.ac_if; > > if (vsc->sc_child != NULL) { > @@ -616,6 +648,9 @@ vio_attach(struct device *parent, struct device *self, void *aux) > VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM | > VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM; > > + if (va->va_nintr > 3 && ncpus > 1) > + vsc->sc_driver_features |= VIRTIO_NET_F_MQ; > + > vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; > vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; > > @@ -626,10 +661,23 @@ vio_attach(struct device *parent, struct device *self, void *aux) > if (virtio_negotiate_features(vsc, virtio_net_feature_names) != 0) > goto err; > > - sc->sc_nqueues = 1; > - vsc->sc_nvqs = 2 * sc->sc_nqueues; > - if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) > - vsc->sc_nvqs++; > + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { > + i = virtio_read_device_config_2(vsc, > + VIRTIO_NET_CONFIG_MAX_QUEUES); > + vsc->sc_nvqs = 2 * i + 1; > + i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX); > + sc->sc_intrmap = intrmap_create(&sc->sc_dev, i, > + va->va_nintr - 2, 0); > + sc->sc_nqueues = intrmap_count(sc->sc_intrmap); > + printf(": %u queue%s", sc->sc_nqueues, > + sc->sc_nqueues > 1 ? "s" : ""); > + } else { > + sc->sc_nqueues = 1; > + printf(": 1 queue"); > + vsc->sc_nvqs = 2; > + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) > + vsc->sc_nvqs++; > + } > > vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF, > M_WAITOK|M_ZERO); > @@ -729,18 +777,66 @@ vio_attach(struct device *parent, struct device *self, void *aux) > else > virtio_stop_vq_intr(vsc, vioq->viq_txvq); > vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1; > + KASSERT(vioq->viq_txfree_slots > sc->sc_tx_slots_per_req); > + if (vioq->viq_txvq->vq_num != sc->sc_q[0].viq_txvq->vq_num) { > + printf("inequal tx queue size %d: %d != %d\n", i, > + vioq->viq_txvq->vq_num, > + sc->sc_q[0].viq_txvq->vq_num); > + goto err; > + } > + DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->viq_rxvq, > + vioq->viq_txvq); > + > + if (sc->sc_intrmap != NULL) { > + vioq->viq_rxvq->vq_intr_vec = i + 2; > + vioq->viq_txvq->vq_intr_vec = i + 2; > + } > } > > /* control queue */ > if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) { > - sc->sc_ctl_vq = &vsc->sc_vqs[2]; > - if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, 2, 1, > - "control") != 0) > + i = 2; > + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { > + i = 2 * virtio_read_device_config_2(vsc, > + VIRTIO_NET_CONFIG_MAX_QUEUES); > + } > + sc->sc_ctl_vq = &vsc->sc_vqs[i]; > + if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, "control") != 0) > goto err; > sc->sc_ctl_vq->vq_done = vio_ctrleof; > + if (sc->sc_intrmap != NULL) > + sc->sc_ctl_vq->vq_intr_vec = 1; > virtio_start_vq_intr(vsc, sc->sc_ctl_vq); > } > > + if (sc->sc_intrmap) { > + r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr, > + vsc); > + if (r != 0) { > + printf("%s: cannot alloc config intr: %d\n", > + sc->sc_dev.dv_xname, r); > + goto err; > + } > + r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr, > + sc->sc_ctl_vq); > + if (r != 0) { > + printf("%s: cannot alloc ctrl intr: %d\n", > + sc->sc_dev.dv_xname, r); > + goto err; > + } > + for (i = 0; i < sc->sc_nqueues; i++) { > + struct cpu_info *ci = NULL; > + ci = intrmap_cpu(sc->sc_intrmap, i); > + r = virtio_intr_establish(vsc, va, i + 2, ci, > + vio_queue_intr, &sc->sc_q[i]); > + if (r != 0) { > + printf("%s: cannot alloc q%d intr: %d\n", > + sc->sc_dev.dv_xname, i, r); > + goto err; > + } > + } > + } > + > if (vio_alloc_mem(sc, tx_max_segments) < 0) > goto err; > > @@ -760,6 +856,11 @@ vio_attach(struct device *parent, struct device *self, void *aux) > if (virtio_attach_finish(vsc, va) != 0) > goto err; > > + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { > + /* ctrl queue works only after DRIVER_OK */ > + vio_ctrl_mq(sc); > + } > + > if_attach(ifp); > ether_ifattach(ifp); > vio_link_state(ifp); > @@ -805,6 +906,33 @@ vio_link_state(struct ifnet *ifp) > } > } > > +/* interrupt handlers for multi-queue */ > +int > +vio_queue_intr(void *arg) > +{ > + struct vio_queue *vioq = arg; > + struct virtio_softc *vsc = vioq->viq_sc->sc_virtio; > + int r; > + r = virtio_check_vq(vsc, vioq->viq_txvq); > + r |= virtio_check_vq(vsc, vioq->viq_rxvq); > + return r; > +} > + > +int > +vio_config_intr(void *arg) > +{ > + struct virtio_softc *vsc = arg; > + return vio_config_change(vsc); > +} > + > +int > +vio_ctrl_intr(void *arg) > +{ > + struct virtqueue *vq = arg; > + return virtio_check_vq(vq->vq_owner, vq); > +} > + > + > int > vio_config_change(struct virtio_softc *vsc) > { > @@ -913,6 +1041,8 @@ vio_stop(struct ifnet *ifp, int disable) > if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) > virtio_start_vq_intr(vsc, sc->sc_ctl_vq); > virtio_reinit_end(vsc); > + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) > + vio_ctrl_mq(sc); > if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) > vio_ctrl_wakeup(sc, FREE); > } > @@ -1137,6 +1267,33 @@ vio_dump(struct vio_softc *sc) > } > #endif > > +static int > +vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri) > +{ > + struct if_rxring_info *ifrs, *ifr; > + int error; > + unsigned int i; > + > + ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs), > + M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL); > + if (ifrs == NULL) > + return (ENOMEM); > + > + for (i = 0; i < sc->sc_nqueues; i++) { > + ifr = &ifrs[i]; > + > + ifr->ifr_size = sc->sc_rx_mbuf_size; > + snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i); > + ifr->ifr_info = sc->sc_q[i].viq_rxring; > + } > + > + error = if_rxr_info_ioctl(ifri, i, ifrs); > + > + free(ifrs, M_TEMP, i * sizeof(*ifrs)); > + > + return (error); > +} > + > int > vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) > { > @@ -1171,8 +1328,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) > r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); > break; > case SIOCGIFRXR: > - r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data, > - NULL, sc->sc_rx_mbuf_size, &sc->sc_q[0].viq_rxring); > + r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data); > break; > default: > r = ether_ioctl(ifp, &sc->sc_ac, cmd, data); > @@ -1666,6 +1822,8 @@ vio_ctrl_submit(struct vio_softc *sc, int slot) > vio_ctrl_wakeup(sc, RESET); > return ENXIO; > } > + if (cold) > + virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq); > } > > VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, > @@ -1723,6 +1881,41 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) > return r; > } > > +/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */ > +int > +vio_ctrl_mq(struct vio_softc *sc) > +{ > + struct virtio_softc *vsc = sc->sc_virtio; > + struct virtqueue *vq = sc->sc_ctl_vq; > + int r, slot; > + > + > + r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MQ, > + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 1, &slot); > + if (r != 0) > + return r; > + > + sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues; > + > + vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mq_pairs, > + sizeof(*sc->sc_ctrl_mq_pairs), 1); > + > + r = vio_ctrl_submit(sc, slot); > + > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs, > + sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE); > + > + if (r != 0) > + printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, > + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET); > + > + DPRINTF("%s: cmd %d %d: %d\n", __func__, > + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r); > + > + vio_ctrl_finish(sc); > + return r; > +} > + > int > vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) > { > diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h > index 207e43ce9b8..a0727e76ee8 100644 > --- a/sys/dev/pv/virtiovar.h > +++ b/sys/dev/pv/virtiovar.h > @@ -165,6 +165,8 @@ struct virtio_ops { > int (*attach_finish)(struct virtio_softc *, struct virtio_attach_args *); > int (*poll_intr)(void *); > void (*intr_barrier)(struct virtio_softc *); > + int (*intr_establish)(struct virtio_softc *, struct virtio_attach_args *, > + int, struct cpu_info *, int (*)(void *), void *); > }; > > #define VIRTIO_CHILD_ERROR ((void*)1) > @@ -208,6 +210,14 @@ struct virtio_softc { > #define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i) > #define virtio_intr_barrier(sc) (sc)->sc_ops->intr_barrier(sc) > > +/* > + * virtio_intr_establish() only works if va_nintr > 1. If it is called by a > + * child driver, the transport driver will skip automatic intr allocation and > + * the child driver must allocate all required interrupts itself. Vector 0 is > + * always used for the config change interrupt. > + */ > +#define virtio_intr_establish(sc, va, v, ci, fn, a) (sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a) > + > /* only for transport drivers */ > #define virtio_device_reset(sc) virtio_set_status((sc), 0) >