From: Stefan Fritsch Subject: Re: vio(4) multi-queue To: tech@openbsd.org Date: Wed, 28 Aug 2024 14:53:36 +0200 Hi, new diff V3 below, updated for commits in -current and with some minor cleanup. Cheers, Stefan diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c index 212614f98ff..1e2d16a2257 100644 --- a/sys/dev/fdt/virtio_mmio.c +++ b/sys/dev/fdt/virtio_mmio.c @@ -97,11 +97,15 @@ void virtio_mmio_write_device_config_4(struct virtio_softc *, int, uint32_t); void virtio_mmio_write_device_config_8(struct virtio_softc *, int, uint64_t); uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t); void virtio_mmio_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t); +void virtio_mmio_setup_intrs(struct virtio_softc *); int virtio_mmio_get_status(struct virtio_softc *); void virtio_mmio_set_status(struct virtio_softc *, int); int virtio_mmio_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *); int virtio_mmio_intr(void *); +int virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *, + int, struct cpu_info *, int (*)(void *), void *); + struct virtio_mmio_softc { struct virtio_softc sc_sc; @@ -145,10 +149,12 @@ const struct virtio_ops virtio_mmio_ops = { virtio_mmio_write_device_config_8, virtio_mmio_read_queue_size, virtio_mmio_setup_queue, + virtio_mmio_setup_intrs, virtio_mmio_get_status, virtio_mmio_set_status, virtio_mmio_negotiate_features, virtio_mmio_intr, + virtio_mmio_intr_establish, }; uint16_t @@ -196,6 +202,11 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq, } } +void +virtio_mmio_setup_intrs(struct virtio_softc *vsc) +{ +} + int virtio_mmio_get_status(struct virtio_softc *vsc) { @@ -515,3 +526,11 @@ virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx) bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NOTIFY, idx); } + +int +virtio_mmio_intr_establish(struct virtio_softc *vsc, + struct virtio_attach_args *va, int vec, struct cpu_info *ci, + int (*func)(void *), void *arg) +{ + return ENXIO; +} diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c index f7b2acda17b..5280b237ef5 100644 --- a/sys/dev/pci/virtio_pci.c +++ b/sys/dev/pci/virtio_pci.c @@ -50,7 +50,7 @@ * XXX: PCI-endian while the device specific registers are native endian. */ -#define MAX_MSIX_VECS 8 +#define MAX_MSIX_VECS 16 struct virtio_pci_softc; struct virtio_pci_attach_args; @@ -62,7 +62,7 @@ int virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p int virtio_pci_detach(struct device *, int); void virtio_pci_kick(struct virtio_softc *, uint16_t); -int virtio_pci_adjust_config_region(struct virtio_pci_softc *); +int virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset); uint8_t virtio_pci_read_device_config_1(struct virtio_softc *, int); uint16_t virtio_pci_read_device_config_2(struct virtio_softc *, int); uint32_t virtio_pci_read_device_config_4(struct virtio_softc *, int); @@ -73,14 +73,16 @@ void virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t); void virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t); uint16_t virtio_pci_read_queue_size(struct virtio_softc *, uint16_t); void virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t); +void virtio_pci_setup_intrs(struct virtio_softc *); int virtio_pci_get_status(struct virtio_softc *); void virtio_pci_set_status(struct virtio_softc *, int); int virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *); int virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *); void virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t); void virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t); -int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *); +int virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *); int virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int); +int virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *); void virtio_pci_free_irqs(struct virtio_pci_softc *); int virtio_pci_poll_intr(void *); int virtio_pci_legacy_intr(void *); @@ -97,6 +99,12 @@ enum irq_type { IRQ_NO_MSIX, IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */ IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */ + IRQ_MSIX_CHILD, /* assigned by child driver */ +}; + +struct virtio_pci_intr { + char name[16]; + void *ih; }; struct virtio_pci_softc { @@ -132,7 +140,8 @@ struct virtio_pci_softc { bus_space_handle_t sc_isr_ioh; bus_size_t sc_isr_iosize; - void *sc_ih[MAX_MSIX_VECS]; + struct virtio_pci_intr *sc_intr; + int sc_nintr; enum irq_type sc_irq_type; }; @@ -163,10 +172,12 @@ const struct virtio_ops virtio_pci_ops = { virtio_pci_write_device_config_8, virtio_pci_read_queue_size, virtio_pci_setup_queue, + virtio_pci_setup_intrs, virtio_pci_get_status, virtio_pci_set_status, virtio_pci_negotiate_features, virtio_pci_poll_intr, + virtio_pci_intr_establish, }; static inline uint64_t @@ -265,25 +276,26 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq, bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE); } +} - /* - * This path is only executed if this function is called after - * the child's attach function has finished. In other cases, - * it's done in virtio_pci_setup_msix(). - */ - if (sc->sc_irq_type != IRQ_NO_MSIX) { - int vec = 1; - if (sc->sc_irq_type == IRQ_MSIX_PER_VQ) - vec += vq->vq_index; - if (sc->sc_sc.sc_version_1) { - CWRITE(sc, queue_msix_vector, vec); - } else { - bus_space_write_2(sc->sc_iot, sc->sc_ioh, - VIRTIO_MSI_QUEUE_VECTOR, vec); - } +void +virtio_pci_setup_intrs(struct virtio_softc *vsc) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + int i; + + if (sc->sc_irq_type == IRQ_NO_MSIX) + return; + + for (i = 0; i <= vsc->sc_nvqs; i++) { + unsigned vec = vsc->sc_vqs[i].vq_intr_vec; + virtio_pci_set_msix_queue_vector(sc, i, vec); } + if (vsc->sc_config_change) + virtio_pci_set_msix_config_vector(sc, 0); } + int virtio_pci_get_status(struct virtio_softc *vsc) { @@ -585,7 +597,6 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) char const *intrstr; pci_intr_handle_t ih; struct virtio_pci_attach_args vpa = { { 0 }, pa }; - int n; revision = PCI_REVISION(pa->pa_class); switch (revision) { @@ -617,9 +628,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) virtio_pci_dump_caps(sc); #endif - n = MIN(MAX_MSIX_VECS, pci_intr_msix_count(pa)); - n = MAX(n, 1); - vpa.vpa_va.va_nintr = n; + sc->sc_nintr = MIN(MAX_MSIX_VECS, pci_intr_msix_count(pa)); + sc->sc_nintr = MAX(sc->sc_nintr, 1); + vpa.vpa_va.va_nintr = sc->sc_nintr; + + sc->sc_intr = mallocarray(sc->sc_nintr, sizeof(*sc->sc_intr), + M_DEVBUF, M_NOWAIT | M_ZERO); vsc->sc_ops = &virtio_pci_ops; if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 && @@ -633,13 +647,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) } if (ret != 0) { printf(": Cannot attach (%d)\n", ret); - return; + goto fail_0; } - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; sc->sc_irq_type = IRQ_NO_MSIX; - if (virtio_pci_adjust_config_region(sc) != 0) - return; + if (virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0) + goto fail_0; virtio_device_reset(vsc); virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK); @@ -660,7 +673,9 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) goto fail_1; } - if (virtio_pci_setup_msix(sc, &vpa, 0) == 0) { + if (sc->sc_irq_type == IRQ_MSIX_CHILD) { + intrstr = "msix"; + } else if (virtio_pci_setup_msix(sc, &vpa, 0) == 0) { sc->sc_irq_type = IRQ_MSIX_PER_VQ; intrstr = "msix per-VQ"; } else if (virtio_pci_setup_msix(sc, &vpa, 1) == 0) { @@ -680,9 +695,9 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) */ if (vsc->sc_ipl & IPL_MPSAFE) ih_func = virtio_pci_legacy_intr_mpsafe; - sc->sc_ih[0] = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE, - ih_func, sc, vsc->sc_dev.dv_xname); - if (sc->sc_ih[0] == NULL) { + sc->sc_intr[0].ih = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE, + ih_func, sc, vsc->sc_child->dv_xname); + if (sc->sc_intr[0].ih == NULL) { printf("%s: couldn't establish interrupt", vsc->sc_dev.dv_xname); if (intrstr != NULL) printf(" at %s", intrstr); @@ -690,6 +705,7 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux) goto fail_2; } } + virtio_pci_setup_intrs(vsc); printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr); return; @@ -699,6 +715,8 @@ fail_2: fail_1: /* no pci_mapreg_unmap() or pci_intr_unmap() */ virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED); +fail_0: + free(sc->sc_intr, M_DEVBUF, sc->sc_nintr * sizeof(*sc->sc_intr)); } int @@ -724,11 +742,14 @@ virtio_pci_detach(struct device *self, int flags) } int -virtio_pci_adjust_config_region(struct virtio_pci_softc *sc) +virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset) { if (sc->sc_sc.sc_version_1) return 0; - sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset; + if (sc->sc_devcfg_offset == offset) + return 0; + sc->sc_devcfg_offset = offset; + sc->sc_devcfg_iosize = sc->sc_iosize - offset; sc->sc_devcfg_iot = sc->sc_iot; if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset, sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) { @@ -923,26 +944,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc, int virtio_pci_msix_establish(struct virtio_pci_softc *sc, - struct virtio_pci_attach_args *vpa, int idx, + struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci, int (*handler)(void *), void *ih_arg) { struct virtio_softc *vsc = &sc->sc_sc; pci_intr_handle_t ih; + int r; - if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) { + KASSERT(idx < sc->sc_nintr); + + r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih); + if (r != 0) { #if VIRTIO_DEBUG printf("%s[%d]: pci_intr_map_msix failed\n", vsc->sc_dev.dv_xname, idx); #endif - return 1; + return r; } - sc->sc_ih[idx] = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl, - handler, ih_arg, vsc->sc_dev.dv_xname); - if (sc->sc_ih[idx] == NULL) { + snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d", + vsc->sc_child->dv_xname, idx); + sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl, + ci, handler, ih_arg, sc->sc_intr[idx].name); + if (sc->sc_intr[idx].ih == NULL) { printf("%s[%d]: couldn't establish msix interrupt\n", - vsc->sc_dev.dv_xname, idx); - return 1; + vsc->sc_child->dv_xname, idx); + return ENOMEM; } + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI); return 0; } @@ -985,15 +1013,15 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc) } } - for (i = 0; i < MAX_MSIX_VECS; i++) { - if (sc->sc_ih[i]) { - pci_intr_disestablish(sc->sc_pc, sc->sc_ih[i]); - sc->sc_ih[i] = NULL; + for (i = 0; i < sc->sc_nintr; i++) { + if (sc->sc_intr[i].ih) { + pci_intr_disestablish(sc->sc_pc, sc->sc_intr[i].ih); + sc->sc_intr[i].ih = NULL; } } - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI; - virtio_pci_adjust_config_region(sc); + /* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */ + virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI); } int @@ -1001,43 +1029,62 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct virtio_pci_attach_args *vpa, int shared) { struct virtio_softc *vsc = &sc->sc_sc; - int i; + int i, r = 0; /* Shared needs config + queue */ if (shared && vpa->vpa_va.va_nintr < 1 + 1) - return 1; + return ERANGE; /* Per VQ needs config + N * queue */ if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs) - return 1; + return ERANGE; - if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc)) - return 1; - sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI; - virtio_pci_adjust_config_region(sc); - virtio_pci_set_msix_config_vector(sc, 0); + r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc); + if (r != 0) + return r; if (shared) { - if (virtio_pci_msix_establish(sc, vpa, 1, - virtio_pci_shared_queue_intr, vsc)) { + r = virtio_pci_msix_establish(sc, vpa, 1, NULL, + virtio_pci_shared_queue_intr, vsc); + if (r != 0) goto fail; - } for (i = 0; i < vsc->sc_nvqs; i++) - virtio_pci_set_msix_queue_vector(sc, i, 1); + vsc->sc_vqs[i].vq_intr_vec = 1; } else { for (i = 0; i < vsc->sc_nvqs; i++) { - if (virtio_pci_msix_establish(sc, vpa, i + 1, - virtio_pci_queue_intr, &vsc->sc_vqs[i])) { + r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL, + virtio_pci_queue_intr, &vsc->sc_vqs[i]); + if (r != 0) goto fail; - } - virtio_pci_set_msix_queue_vector(sc, i, i + 1); + vsc->sc_vqs[i].vq_intr_vec = i + 1; } } return 0; fail: virtio_pci_free_irqs(sc); - return 1; + return r; +} + +int +virtio_pci_intr_establish(struct virtio_softc *vsc, + struct virtio_attach_args *va, int vec, struct cpu_info *ci, + int (*func)(void *), void *arg) +{ + struct virtio_pci_attach_args *vpa; + struct virtio_pci_softc *sc; + + if (vsc->sc_ops != &virtio_pci_ops) + return ENXIO; + + vpa = (struct virtio_pci_attach_args *)va; + sc = (struct virtio_pci_softc *)vsc; + + if (vec >= sc->sc_nintr || sc->sc_nintr <= 1) + return ERANGE; + + sc->sc_irq_type = IRQ_MSIX_CHILD; + return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg); } /* diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c index 5916f14fde8..45a50e8df80 100644 --- a/sys/dev/pv/if_vio.c +++ b/sys/dev/pv/if_vio.c @@ -32,7 +32,9 @@ #include #include #include +#include #include +#include #include #include @@ -63,8 +65,15 @@ * if_vioreg.h: */ /* Configuration registers */ -#define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */ -#define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */ +#define VIRTIO_NET_CONFIG_MAC 0 /* 8 bit x 6 byte */ +#define VIRTIO_NET_CONFIG_STATUS 6 /* 16 bit */ +#define VIRTIO_NET_CONFIG_MAX_QUEUES 8 /* 16 bit */ +#define VIRTIO_NET_CONFIG_MTU 10 /* 16 bit */ +#define VIRTIO_NET_CONFIG_SPEED 12 /* 32 bit */ +#define VIRTIO_NET_CONFIG_DUPLEX 16 /* 8 bit */ +#define VIRTIO_NET_CONFIG_RSS_SIZE 17 /* 8 bit */ +#define VIRTIO_NET_CONFIG_RSS_LEN 18 /* 16 bit */ +#define VIRTIO_NET_CONFIG_HASH_TYPES 20 /* 16 bit */ /* Feature bits */ #define VIRTIO_NET_F_CSUM (1ULL<<0) @@ -182,6 +191,11 @@ struct virtio_net_ctrl_cmd { # define VIRTIO_NET_CTRL_VLAN_ADD 0 # define VIRTIO_NET_CTRL_VLAN_DEL 1 +#define VIRTIO_NET_CTRL_MQ 4 +# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 +# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG 2 + #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 @@ -195,6 +209,12 @@ struct virtio_net_ctrl_rx { uint8_t onoff; } __packed; +struct virtio_net_ctrl_mq_pairs_set { + uint16_t virtqueue_pairs; +}; +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + struct virtio_net_ctrl_guest_offloads { uint64_t offloads; } __packed; @@ -215,14 +235,28 @@ enum vio_ctrl_state { FREE, INUSE, DONE, RESET }; +struct vio_queue { + struct vio_softc *sc; + struct virtio_net_hdr *tx_hdrs; + bus_dmamap_t *arrays; +#define rx_dmamaps arrays + bus_dmamap_t *tx_dmamaps; + struct mbuf **rx_mbufs; + struct mbuf **tx_mbufs; + struct if_rxring rx_ring; + struct ifiqueue *ifiq; + struct ifqueue *ifq; + struct virtqueue *rx_vq; + struct virtqueue *tx_vq; + struct mutex tx_mtx, rx_mtx; + int tx_free_slots; +} __aligned(64); + struct vio_softc { struct device sc_dev; struct virtio_softc *sc_virtio; -#define VQRX 0 -#define VQTX 1 -#define VQCTL 2 - struct virtqueue sc_vq[3]; + struct virtqueue *sc_ctl_vq; struct arpcom sc_ac; struct ifmedia sc_media; @@ -236,22 +270,19 @@ struct vio_softc { caddr_t sc_dma_kva; int sc_hdr_size; - struct virtio_net_hdr *sc_tx_hdrs; - struct virtio_net_ctrl_cmd *sc_ctrl_cmd; - struct virtio_net_ctrl_status *sc_ctrl_status; - struct virtio_net_ctrl_rx *sc_ctrl_rx; - struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; + struct virtio_net_ctrl_cmd *sc_ctrl_cmd; + struct virtio_net_ctrl_status *sc_ctrl_status; + struct virtio_net_ctrl_rx *sc_ctrl_rx; + struct virtio_net_ctrl_mq_pairs_set *sc_ctrl_mq_pairs; + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc - struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; + struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; - /* kmem */ - bus_dmamap_t *sc_arrays; -#define sc_rx_dmamaps sc_arrays - bus_dmamap_t *sc_tx_dmamaps; - struct mbuf **sc_rx_mbufs; - struct mbuf **sc_tx_mbufs; - struct if_rxring sc_rx_ring; + struct intrmap *sc_intrmap; + struct vio_queue *sc_q; + uint16_t sc_nqueues; + int sc_tx_slots_per_req; enum vio_ctrl_state sc_ctrl_inuse; @@ -285,31 +316,37 @@ void vio_attach(struct device *, struct device *, void *); /* ifnet interface functions */ int vio_init(struct ifnet *); void vio_stop(struct ifnet *, int); -void vio_start(struct ifnet *); +void vio_start(struct ifqueue *); int vio_ioctl(struct ifnet *, u_long, caddr_t); void vio_get_lladdr(struct arpcom *ac, struct virtio_softc *vsc); void vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc); /* rx */ -int vio_add_rx_mbuf(struct vio_softc *, int); -void vio_free_rx_mbuf(struct vio_softc *, int); -void vio_populate_rx_mbufs(struct vio_softc *); -int vio_rxeof(struct vio_softc *); +int vio_add_rx_mbuf(struct vio_softc *, struct vio_queue *, int); +void vio_free_rx_mbuf(struct vio_softc *, struct vio_queue *, int); +void vio_populate_rx_mbufs(struct vio_softc *, struct vio_queue *); +int vio_rxeof(struct vio_queue *); int vio_rx_intr(struct virtqueue *); void vio_rx_drain(struct vio_softc *); void vio_rxtick(void *); /* tx */ int vio_tx_intr(struct virtqueue *); +int vio_tx_dequeue(struct virtqueue *); int vio_txeof(struct virtqueue *); void vio_tx_drain(struct vio_softc *); -int vio_encap(struct vio_softc *, int, struct mbuf *); +int vio_encap(struct vio_queue *, int, struct mbuf *); void vio_txtick(void *); +int vio_queue_intr(void *); +int vio_config_intr(void *); +int vio_ctrl_intr(void *); + /* other control */ void vio_link_state(struct ifnet *); int vio_config_change(struct virtio_softc *); int vio_ctrl_rx(struct vio_softc *, int, int); +int vio_ctrl_mq(struct vio_softc *); int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t); int vio_set_rx_filter(struct vio_softc *); void vio_iff(struct vio_softc *); @@ -381,6 +418,7 @@ void vio_free_dmamem(struct vio_softc *sc) { struct virtio_softc *vsc = sc->sc_virtio; + bus_dmamap_unload(vsc->sc_dmat, sc->sc_dma_map); bus_dmamem_unmap(vsc->sc_dmat, sc->sc_dma_kva, sc->sc_dma_size); bus_dmamem_free(vsc->sc_dmat, &sc->sc_dma_seg, 1); @@ -390,11 +428,13 @@ vio_free_dmamem(struct vio_softc *sc) /* allocate memory */ /* * dma memory is used for: - * sc_tx_hdrs[slot]: metadata array for frames to be sent (WRITE) + * tx_hdrs[slot]: metadata array for frames to be sent (WRITE) * sc_ctrl_cmd: command to be sent via ctrl vq (WRITE) * sc_ctrl_status: return value for a command via ctrl vq (READ) * sc_ctrl_rx: parameter for a VIRTIO_NET_CTRL_RX class command * (WRITE) + * sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE) + * sc_ctrl_guest_offloads: configure offload features (WRITE) * sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC * class command (WRITE) * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC @@ -407,35 +447,36 @@ vio_free_dmamem(struct vio_softc *sc) */ /* * dynamically allocated memory is used for: - * sc_rx_dmamaps[slot]: bus_dmamap_t array for received payload - * sc_tx_dmamaps[slot]: bus_dmamap_t array for sent payload - * sc_rx_mbufs[slot]: mbuf pointer array for received frames - * sc_tx_mbufs[slot]: mbuf pointer array for sent frames + * rx_dmamaps[slot]: bus_dmamap_t array for received payload + * tx_dmamaps[slot]: bus_dmamap_t array for sent payload + * rx_mbufs[slot]: mbuf pointer array for received frames + * tx_mbufs[slot]: mbuf pointer array for sent frames */ int vio_alloc_mem(struct vio_softc *sc) { - struct virtio_softc *vsc = sc->sc_virtio; - struct ifnet *ifp = &sc->sc_ac.ac_if; - int allocsize, r, i, txsize; - unsigned int offset = 0; - int rxqsize, txqsize; - caddr_t kva; + struct virtio_softc *vsc = sc->sc_virtio; + struct ifnet *ifp = &sc->sc_ac.ac_if; + size_t allocsize, rxqsize, txqsize, offset = 0; + bus_size_t txsize; + caddr_t kva; + int i, qidx, r; - rxqsize = vsc->sc_vqs[0].vq_num; - txqsize = vsc->sc_vqs[1].vq_num; + rxqsize = sc->sc_q[0].rx_vq->vq_num; + txqsize = sc->sc_q[0].tx_vq->vq_num; /* * For simplicity, we always allocate the full virtio_net_hdr size * even if VIRTIO_NET_F_MRG_RXBUF is not negotiated and * only a part of the memory is ever used. */ - allocsize = sizeof(struct virtio_net_hdr) * txqsize; + allocsize = sizeof(struct virtio_net_hdr) * txqsize * sc->sc_nqueues; if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) { allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; allocsize += sizeof(struct virtio_net_ctrl_status) * 1; allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; + allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1; allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1; allocsize += VIO_CTRL_MAC_INFO_SIZE; } @@ -447,8 +488,13 @@ vio_alloc_mem(struct vio_softc *sc) } kva = sc->sc_dma_kva; - sc->sc_tx_hdrs = (struct virtio_net_hdr*)(kva + offset); - offset += sizeof(struct virtio_net_hdr) * txqsize; + + for (qidx = 0; qidx < sc->sc_nqueues; qidx++) { + sc->sc_q[qidx].tx_hdrs = + (struct virtio_net_hdr*)(kva + offset); + offset += sizeof(struct virtio_net_hdr) * txqsize; + } + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) { sc->sc_ctrl_cmd = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_cmd); @@ -456,63 +502,78 @@ vio_alloc_mem(struct vio_softc *sc) offset += sizeof(*sc->sc_ctrl_status); sc->sc_ctrl_rx = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_rx); + sc->sc_ctrl_mq_pairs = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_mq_pairs); sc->sc_ctrl_guest_offloads = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_guest_offloads); sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES; sc->sc_ctrl_mac_tbl_mc = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_mac_tbl_mc) + + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MC_ENTRIES; } + KASSERT(offset == allocsize); - sc->sc_arrays = mallocarray(rxqsize + txqsize, - sizeof(bus_dmamap_t) + sizeof(struct mbuf *), M_DEVBUF, - M_WAITOK | M_CANFAIL | M_ZERO); - if (sc->sc_arrays == NULL) { - printf("unable to allocate mem for dmamaps\n"); - goto err_hdr; - } - allocsize = (rxqsize + txqsize) * - (sizeof(bus_dmamap_t) + sizeof(struct mbuf *)); + txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN; - sc->sc_tx_dmamaps = sc->sc_arrays + rxqsize; - sc->sc_rx_mbufs = (void*) (sc->sc_tx_dmamaps + txqsize); - sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize; + for (qidx = 0; qidx < sc->sc_nqueues; qidx++) { + struct vio_queue *vioq = &sc->sc_q[qidx]; - for (i = 0; i < rxqsize; i++) { - r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, - MAXMCLBYTES/PAGE_SIZE + 1, MCLBYTES, 0, - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); - if (r != 0) - goto err_reqs; - } + vioq->arrays = mallocarray(rxqsize + txqsize, + sizeof(bus_dmamap_t) + sizeof(struct mbuf *), M_DEVBUF, + M_WAITOK | M_CANFAIL | M_ZERO); + if (vioq->arrays == NULL) { + printf("unable to allocate mem for dmamaps\n"); + goto free; + } - txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN; - for (i = 0; i < txqsize; i++) { - r = bus_dmamap_create(vsc->sc_dmat, txsize, - VIRTIO_NET_TX_MAXNSEGS, txsize, 0, - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, - &sc->sc_tx_dmamaps[i]); - if (r != 0) - goto err_reqs; + vioq->tx_dmamaps = vioq->arrays + rxqsize; + vioq->rx_mbufs = (void*)(vioq->tx_dmamaps + txqsize); + vioq->tx_mbufs = vioq->rx_mbufs + rxqsize; + + for (i = 0; i < rxqsize; i++) { + r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, + MAXMCLBYTES/PAGE_SIZE + 1, MCLBYTES, 0, + BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vioq->rx_dmamaps[i]); + if (r != 0) + goto destroy; + } + + for (i = 0; i < txqsize; i++) { + r = bus_dmamap_create(vsc->sc_dmat, txsize, + VIRTIO_NET_TX_MAXNSEGS, txsize, 0, + BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, + &vioq->tx_dmamaps[i]); + if (r != 0) + goto destroy; + } } return 0; -err_reqs: + destroy: printf("dmamap creation failed, error %d\n", r); - for (i = 0; i < txqsize; i++) { - if (sc->sc_tx_dmamaps[i]) - bus_dmamap_destroy(vsc->sc_dmat, sc->sc_tx_dmamaps[i]); - } - for (i = 0; i < rxqsize; i++) { - if (sc->sc_rx_dmamaps[i]) - bus_dmamap_destroy(vsc->sc_dmat, sc->sc_rx_dmamaps[i]); - } - if (sc->sc_arrays) { - free(sc->sc_arrays, M_DEVBUF, allocsize); - sc->sc_arrays = 0; + for (qidx = 0; qidx < sc->sc_nqueues; qidx++) { + struct vio_queue *vioq = &sc->sc_q[qidx]; + + for (i = 0; i < txqsize; i++) { + if (vioq->tx_dmamaps[i]) { + bus_dmamap_destroy(vsc->sc_dmat, + vioq->tx_dmamaps[i]); + } + } + for (i = 0; i < rxqsize; i++) { + if (vioq->rx_dmamaps[i]) { + bus_dmamap_destroy(vsc->sc_dmat, + vioq->rx_dmamaps[i]); + } + } + free(vioq->arrays, M_DEVBUF, (rxqsize + txqsize) * + (sizeof(bus_dmamap_t) + sizeof(struct mbuf *))); + vioq->arrays = NULL; } -err_hdr: + free: vio_free_dmamem(sc); return -1; } @@ -554,7 +615,8 @@ vio_attach(struct device *parent, struct device *self, void *aux) { struct vio_softc *sc = (struct vio_softc *)self; struct virtio_softc *vsc = (struct virtio_softc *)parent; - int i; + struct virtio_attach_args *va = aux; + int i, r; struct ifnet *ifp = &sc->sc_ac.ac_if; if (vsc->sc_child != NULL) { @@ -566,14 +628,16 @@ vio_attach(struct device *parent, struct device *self, void *aux) sc->sc_virtio = vsc; vsc->sc_child = self; - vsc->sc_ipl = IPL_NET; - vsc->sc_vqs = &sc->sc_vq[0]; + vsc->sc_ipl = IPL_NET | IPL_MPSAFE; vsc->sc_config_change = NULL; vsc->sc_driver_features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM | VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM; + if (va->va_nintr > 3) + vsc->sc_driver_features |= VIRTIO_NET_F_MQ; + vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; @@ -582,6 +646,37 @@ vio_attach(struct device *parent, struct device *self, void *aux) vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6; virtio_negotiate_features(vsc, virtio_net_feature_names); + + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { + i = virtio_read_device_config_2(vsc, + VIRTIO_NET_CONFIG_MAX_QUEUES); + vsc->sc_nvqs = 2 * i + 1; + i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX); + sc->sc_intrmap = intrmap_create(&sc->sc_dev, i, + va->va_nintr - 2, 0); + sc->sc_nqueues = intrmap_count(sc->sc_intrmap); + printf(": %u queue%s", sc->sc_nqueues, + sc->sc_nqueues > 1 ? "s" : ""); + } else { + sc->sc_nqueues = 1; + printf(": 1 queue"); + vsc->sc_nvqs = 2; + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) + vsc->sc_nvqs++; + } + + vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF, + M_NOWAIT | M_ZERO); + if (vsc->sc_vqs == NULL) { + vsc->sc_nvqs = 0; + goto err; + } + + sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (sc->sc_q == NULL) + goto err; + if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) { vio_get_lladdr(&sc->sc_ac, vsc); } else { @@ -601,37 +696,97 @@ vio_attach(struct device *parent, struct device *self, void *aux) else ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN; - if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, 2, "rx") != 0) - goto err; - vsc->sc_nvqs = 1; - sc->sc_vq[VQRX].vq_done = vio_rx_intr; - if (virtio_alloc_vq(vsc, &sc->sc_vq[VQTX], 1, - VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) { - goto err; - } - vsc->sc_nvqs = 2; - sc->sc_vq[VQTX].vq_done = vio_tx_intr; - virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]); - if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) - virtio_postpone_intr_far(&sc->sc_vq[VQTX]); + if (virtio_has_feature(vsc, VIRTIO_F_RING_INDIRECT_DESC)) + sc->sc_tx_slots_per_req = 1; else - virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]); + sc->sc_tx_slots_per_req = VIRTIO_NET_TX_MAXNSEGS + 1; + + for (i = 0; i < sc->sc_nqueues; i++) { + int vqidx = 2 * i; + struct vio_queue *vioq = &sc->sc_q[i]; + vioq->rx_vq = &vsc->sc_vqs[vqidx]; + mtx_init(&vioq->tx_mtx, IPL_NET); + mtx_init(&vioq->rx_mtx, IPL_NET); + vioq->sc = sc; + if (virtio_alloc_vq(vsc, vioq->rx_vq, vqidx, 2, "rx") != 0) + goto err; + vioq->rx_vq->vq_done = vio_rx_intr; + virtio_start_vq_intr(vsc, vioq->rx_vq); + + vqidx++; + vioq->tx_vq = &vsc->sc_vqs[vqidx]; + if (virtio_alloc_vq(vsc, vioq->tx_vq, vqidx, + VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) { + goto err; + } + vioq->tx_vq->vq_done = vio_tx_intr; + if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) + virtio_postpone_intr_far(vioq->tx_vq); + else + virtio_stop_vq_intr(vsc, vioq->tx_vq); + vioq->tx_free_slots = vioq->tx_vq->vq_num - 1; + KASSERT(vioq->tx_free_slots > sc->sc_tx_slots_per_req); + if (vioq->tx_vq->vq_num != sc->sc_q[0].tx_vq->vq_num) { + printf("inequal tx queue size %d: %d != %d\n", i, + vioq->tx_vq->vq_num, sc->sc_q[0].tx_vq->vq_num); + goto err; + } + DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->rx_vq, vioq->tx_vq); + + if (sc->sc_intrmap != NULL) { + vioq->rx_vq->vq_intr_vec = i + 2; + vioq->tx_vq->vq_intr_vec = i + 2; + } + } + + /* control queue */ if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) { - if (virtio_alloc_vq(vsc, &sc->sc_vq[VQCTL], 2, 1, - "control") == 0) { - sc->sc_vq[VQCTL].vq_done = vio_ctrleof; - virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]); - vsc->sc_nvqs = 3; + i = 2; + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { + i = 2 * virtio_read_device_config_2(vsc, + VIRTIO_NET_CONFIG_MAX_QUEUES); } + sc->sc_ctl_vq = &vsc->sc_vqs[i]; + if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, + "control") != 0) + goto err; + sc->sc_ctl_vq->vq_done = vio_ctrleof; + if (sc->sc_intrmap != NULL) + sc->sc_ctl_vq->vq_intr_vec = 1; + virtio_start_vq_intr(vsc, sc->sc_ctl_vq); } + if (sc->sc_intrmap) { + r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr, vsc); + if (r != 0) { + printf("%s: cannot alloc config intr: %d\n", sc->sc_dev.dv_xname, r); + goto err; + } + r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr, sc->sc_ctl_vq); + if (r != 0) { + printf("%s: cannot alloc ctrl intr: %d\n", sc->sc_dev.dv_xname, r); + goto err; + } + for (i = 0; i < sc->sc_nqueues; i++) { + struct cpu_info *ci = NULL; + ci = intrmap_cpu(sc->sc_intrmap, i); + r = virtio_intr_establish(vsc, va, i + 2, ci, vio_queue_intr, &sc->sc_q[i]); + if (r != 0) { + printf("%s: cannot alloc q%d intr: %d\n", sc->sc_dev.dv_xname, i, r); + goto err; + } + } + } + + if (vio_alloc_mem(sc) < 0) goto err; strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ); ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_start = vio_start; + ifp->if_xflags = IFXF_MPSAFE; + ifp->if_qstart = vio_start; ifp->if_ioctl = vio_ioctl; ifp->if_capabilities = 0; #if NVLAN > 0 @@ -658,18 +813,36 @@ vio_attach(struct device *parent, struct device *self, void *aux) ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); vsc->sc_config_change = vio_config_change; - timeout_set(&sc->sc_txtick, vio_txtick, &sc->sc_vq[VQTX]); - timeout_set(&sc->sc_rxtick, vio_rxtick, &sc->sc_vq[VQRX]); + timeout_set(&sc->sc_txtick, vio_txtick, sc); + timeout_set(&sc->sc_rxtick, vio_rxtick, sc); virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK); + + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { + /* ctrl queue works only after DRIVER_OK */ + vio_ctrl_mq(sc); + } + if_attach(ifp); ether_ifattach(ifp); + vio_link_state(ifp); + + if_attach_queues(ifp, sc->sc_nqueues); + if_attach_iqueues(ifp, sc->sc_nqueues); + + for (i = 0; i < sc->sc_nqueues; i++) { + ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i]; + sc->sc_q[i].ifq = ifp->if_ifqs[i]; + sc->sc_q[i].ifiq = ifp->if_iqs[i]; + } return; err: for (i = 0; i < vsc->sc_nvqs; i++) - virtio_free_vq(vsc, &sc->sc_vq[i]); + virtio_free_vq(vsc, &vsc->sc_vqs[i]); + free(vsc->sc_vqs, M_DEVBUF, vsc->sc_nvqs * sizeof(*vsc->sc_vqs)); + free(sc->sc_q, M_DEVBUF, sc->sc_nqueues * sizeof(*sc->sc_q)); vsc->sc_nvqs = 0; vsc->sc_child = VIRTIO_CHILD_ERROR; return; @@ -695,12 +868,41 @@ vio_link_state(struct ifnet *ifp) } } +/* interrupt handlers for multi-queue */ +int +vio_queue_intr(void *arg) +{ + struct vio_queue *vioq = arg; + struct virtio_softc *vsc = vioq->sc->sc_virtio; + int r; + r = virtio_check_vq(vsc, vioq->tx_vq); + r |= virtio_check_vq(vsc, vioq->rx_vq); + return r; +} + +int +vio_config_intr(void *arg) +{ + struct virtio_softc *vsc = arg; + return vio_config_change(vsc); +} + +int +vio_ctrl_intr(void *arg) +{ + struct virtqueue *vq = arg; + return virtio_check_vq(vq->vq_owner, vq); +} + + int vio_config_change(struct virtio_softc *vsc) { struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + KERNEL_LOCK(); vio_link_state(&sc->sc_ac.ac_if); vio_needs_reset(sc); + KERNEL_UNLOCK(); return 1; } @@ -730,13 +932,16 @@ vio_init(struct ifnet *ifp) { struct vio_softc *sc = ifp->if_softc; struct virtio_softc *vsc = sc->sc_virtio; + int qidx; vio_stop(ifp, 0); - if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), - sc->sc_vq[VQRX].vq_num); - vio_populate_rx_mbufs(sc); - ifp->if_flags |= IFF_RUNNING; - ifq_clr_oactive(&ifp->if_snd); + for (qidx = 0; qidx < sc->sc_nqueues; qidx++) { + struct vio_queue *vioq = &sc->sc_q[qidx]; + if_rxr_init(&vioq->rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), + vioq->rx_vq->vq_num); + vio_populate_rx_mbufs(sc, vioq); + ifq_clr_oactive(vioq->ifq); + } vio_iff(sc); vio_link_state(ifp); @@ -756,6 +961,8 @@ vio_init(struct ifnet *ifp) vio_ctrl_guest_offloads(sc, features); } + SET(ifp->if_flags, IFF_RUNNING); + return 0; } @@ -764,14 +971,19 @@ vio_stop(struct ifnet *ifp, int disable) { struct vio_softc *sc = ifp->if_softc; struct virtio_softc *vsc = sc->sc_virtio; + int i; + CLR(ifp->if_flags, IFF_RUNNING); timeout_del(&sc->sc_txtick); timeout_del(&sc->sc_rxtick); - ifp->if_flags &= ~IFF_RUNNING; - ifq_clr_oactive(&ifp->if_snd); /* only way to stop I/O and DMA is resetting... */ virtio_reset(vsc); - vio_rxeof(sc); + for (i = 0; i < sc->sc_nqueues; i++) { + mtx_enter(&sc->sc_q[i].rx_mtx); + vio_rxeof(&sc->sc_q[i]); + mtx_leave(&sc->sc_q[i].rx_mtx); + } + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) vio_ctrl_wakeup(sc, RESET); vio_tx_drain(sc); @@ -779,11 +991,15 @@ vio_stop(struct ifnet *ifp, int disable) vio_rx_drain(sc); virtio_reinit_start(vsc); - virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]); - virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]); + for (i = 0; i < sc->sc_nqueues; i++) { + virtio_start_vq_intr(vsc, sc->sc_q[i].rx_vq); + virtio_stop_vq_intr(vsc, sc->sc_q[i].tx_vq); + } if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) - virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]); + virtio_start_vq_intr(vsc, sc->sc_ctl_vq); virtio_reinit_end(vsc); + if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) + vio_ctrl_mq(sc); if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) vio_ctrl_wakeup(sc, FREE); } @@ -868,70 +1084,82 @@ vio_tx_offload(struct virtio_net_hdr *hdr, struct mbuf *m) } void -vio_start(struct ifnet *ifp) +vio_start(struct ifqueue *ifq) { + struct ifnet *ifp = ifq->ifq_if; + struct vio_queue *vioq = ifq->ifq_softc; struct vio_softc *sc = ifp->if_softc; struct virtio_softc *vsc = sc->sc_virtio; - struct virtqueue *vq = &sc->sc_vq[VQTX]; + struct virtqueue *vq = vioq->tx_vq; struct mbuf *m; - int queued = 0; + int queued = 0, free_slots, used_slots; - vio_txeof(vq); + mtx_enter(&vioq->tx_mtx); + vio_tx_dequeue(vq); - if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd)) - return; - if (ifq_empty(&ifp->if_snd)) - return; again: + free_slots = vioq->tx_free_slots; + KASSERT(free_slots >= 0); + used_slots = 0; for (;;) { int slot, r; struct virtio_net_hdr *hdr; - m = ifq_deq_begin(&ifp->if_snd); + if (free_slots - used_slots < sc->sc_tx_slots_per_req) { + ifq_set_oactive(ifq); + break; + } + + m = ifq_dequeue(ifq); if (m == NULL) break; r = virtio_enqueue_prep(vq, &slot); if (r == EAGAIN) { - ifq_deq_rollback(&ifp->if_snd, m); - ifq_set_oactive(&ifp->if_snd); + printf("%s: virtio_enqueue_prep failed?\n", __func__); + m_freem(m); + ifq->ifq_errors++; break; } if (r != 0) panic("%s: enqueue_prep for tx buffer: %d", sc->sc_dev.dv_xname, r); - hdr = &sc->sc_tx_hdrs[slot]; + hdr = &vioq->tx_hdrs[slot]; memset(hdr, 0, sc->sc_hdr_size); vio_tx_offload(hdr, m); - r = vio_encap(sc, slot, m); + r = vio_encap(vioq, slot, m); if (r != 0) { virtio_enqueue_abort(vq, slot); - ifq_deq_commit(&ifp->if_snd, m); m_freem(m); - ifp->if_oerrors++; + ifq->ifq_errors++; continue; } r = virtio_enqueue_reserve(vq, slot, - sc->sc_tx_dmamaps[slot]->dm_nsegs + 1); + vioq->tx_dmamaps[slot]->dm_nsegs + 1); if (r != 0) { + printf("%s: virtio_enqueue_reserve failed?\n", __func__); + m_freem(m); + ifq->ifq_errors++; bus_dmamap_unload(vsc->sc_dmat, - sc->sc_tx_dmamaps[slot]); - ifq_deq_rollback(&ifp->if_snd, m); - sc->sc_tx_mbufs[slot] = NULL; - ifq_set_oactive(&ifp->if_snd); + vioq->tx_dmamaps[slot]); + vioq->tx_mbufs[slot] = NULL; break; } - ifq_deq_commit(&ifp->if_snd, m); + if (sc->sc_tx_slots_per_req == 1) + used_slots++; + else + used_slots += vioq->tx_dmamaps[slot]->dm_nsegs + 1; + - bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0, - sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(vsc->sc_dmat, vioq->tx_dmamaps[slot], 0, + vioq->tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREWRITE); VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size, BUS_DMASYNC_PREWRITE); VIO_DMAMEM_ENQUEUE(sc, vq, slot, hdr, sc->sc_hdr_size, 1); - virtio_enqueue(vq, slot, sc->sc_tx_dmamaps[slot], 1); + virtio_enqueue(vq, slot, vioq->tx_dmamaps[slot], 1); virtio_enqueue_commit(vsc, vq, slot, 0); queued++; #if NBPFILTER > 0 @@ -939,14 +1167,21 @@ again: bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif } - if (ifq_is_oactive(&ifp->if_snd)) { + if (used_slots > 0) { + if (used_slots > vioq->tx_free_slots) + printf("%s: used_slots %d tx_free_slots %d free_slots %d\n", + __func__, used_slots, vioq->tx_free_slots, free_slots); + vioq->tx_free_slots -= used_slots; + KASSERT(vioq->tx_free_slots >= 0); + } + if (ifq_is_oactive(ifq)) { int r; if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) - r = virtio_postpone_intr_smart(&sc->sc_vq[VQTX]); + r = virtio_postpone_intr_smart(vq); else - r = virtio_start_vq_intr(vsc, &sc->sc_vq[VQTX]); + r = virtio_start_vq_intr(vsc, vq); if (r) { - vio_txeof(vq); + vio_tx_dequeue(vq); goto again; } } @@ -955,6 +1190,7 @@ again: virtio_notify(vsc, vq); timeout_add_sec(&sc->sc_txtick, 1); } + mtx_leave(&vioq->tx_mtx); } #if VIRTIO_DEBUG @@ -963,22 +1199,54 @@ vio_dump(struct vio_softc *sc) { struct ifnet *ifp = &sc->sc_ac.ac_if; struct virtio_softc *vsc = sc->sc_virtio; + int i; printf("%s status dump:\n", ifp->if_xname); - printf("TX virtqueue:\n"); - virtio_vq_dump(&vsc->sc_vqs[VQTX]); printf("tx tick active: %d\n", !timeout_triggered(&sc->sc_txtick)); + printf("max tx slots per req %d\n", sc->sc_tx_slots_per_req); printf("rx tick active: %d\n", !timeout_triggered(&sc->sc_rxtick)); - printf("RX virtqueue:\n"); - virtio_vq_dump(&vsc->sc_vqs[VQRX]); + for (i = 0; i < sc->sc_nqueues; i++) { + printf("%d: TX virtqueue:\n", i); + printf(" tx free slots %d\n", sc->sc_q[i].tx_free_slots); + virtio_vq_dump(sc->sc_q[i].tx_vq); + printf("%d: RX virtqueue:\n", i); + virtio_vq_dump(sc->sc_q[i].rx_vq); + } if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) { printf("CTL virtqueue:\n"); - virtio_vq_dump(&vsc->sc_vqs[VQCTL]); + virtio_vq_dump(sc->sc_ctl_vq); printf("ctrl_inuse: %d\n", sc->sc_ctrl_inuse); } } #endif +static int +vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri) +{ + struct if_rxring_info *ifrs, *ifr; + int error; + unsigned int i; + + ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs), + M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL); + if (ifrs == NULL) + return (ENOMEM); + + for (i = 0; i < sc->sc_nqueues; i++) { + ifr = &ifrs[i]; + + ifr->ifr_size = MCLBYTES; + snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i); + ifr->ifr_info = sc->sc_q[i].rx_ring; + } + + error = if_rxr_info_ioctl(ifri, i, ifrs); + + free(ifrs, M_TEMP, i * sizeof(*ifrs)); + + return (error); +} + int vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { @@ -1013,8 +1281,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; case SIOCGIFRXR: - r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data, - NULL, MCLBYTES, &sc->sc_rx_ring); + r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data); break; default: r = ether_ioctl(ifp, &sc->sc_ac, cmd, data); @@ -1034,7 +1301,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) */ /* allocate and initialize a mbuf for receive */ int -vio_add_rx_mbuf(struct vio_softc *sc, int i) +vio_add_rx_mbuf(struct vio_softc *sc, struct vio_queue *vioq, int i) { struct mbuf *m; int r; @@ -1042,13 +1309,14 @@ vio_add_rx_mbuf(struct vio_softc *sc, int i) m = MCLGETL(NULL, M_DONTWAIT, MCLBYTES); if (m == NULL) return ENOBUFS; - sc->sc_rx_mbufs[i] = m; + // XXX m_adj ETHER_ALIGN ? + vioq->rx_mbufs[i] = m; m->m_len = m->m_pkthdr.len = m->m_ext.ext_size; - r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i], - m, BUS_DMA_READ|BUS_DMA_NOWAIT); + r = bus_dmamap_load_mbuf(sc->sc_virtio->sc_dmat, + vioq->rx_dmamaps[i], m, BUS_DMA_READ|BUS_DMA_NOWAIT); if (r) { m_freem(m); - sc->sc_rx_mbufs[i] = NULL; + vioq->rx_mbufs[i] = NULL; return r; } @@ -1057,24 +1325,24 @@ vio_add_rx_mbuf(struct vio_softc *sc, int i) /* free a mbuf for receive */ void -vio_free_rx_mbuf(struct vio_softc *sc, int i) +vio_free_rx_mbuf(struct vio_softc *sc, struct vio_queue *vioq, int i) { - bus_dmamap_unload(sc->sc_virtio->sc_dmat, sc->sc_rx_dmamaps[i]); - m_freem(sc->sc_rx_mbufs[i]); - sc->sc_rx_mbufs[i] = NULL; + bus_dmamap_unload(sc->sc_virtio->sc_dmat, vioq->rx_dmamaps[i]); + m_freem(vioq->rx_mbufs[i]); + vioq->rx_mbufs[i] = NULL; } /* add mbufs for all the empty receive slots */ void -vio_populate_rx_mbufs(struct vio_softc *sc) +vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq) { struct virtio_softc *vsc = sc->sc_virtio; int r, done = 0; u_int slots; - struct virtqueue *vq = &sc->sc_vq[VQRX]; + struct virtqueue *vq = vioq->rx_vq; int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc); - for (slots = if_rxr_get(&sc->sc_rx_ring, vq->vq_num); + for (slots = if_rxr_get(&vioq->rx_ring, vq->vq_num); slots > 0; slots--) { int slot; r = virtio_enqueue_prep(vq, &slot); @@ -1083,38 +1351,38 @@ vio_populate_rx_mbufs(struct vio_softc *sc) if (r != 0) panic("%s: enqueue_prep for rx buffer: %d", sc->sc_dev.dv_xname, r); - if (sc->sc_rx_mbufs[slot] == NULL) { - r = vio_add_rx_mbuf(sc, slot); + if (vioq->rx_mbufs[slot] == NULL) { + r = vio_add_rx_mbuf(sc, vioq, slot); if (r != 0) { virtio_enqueue_abort(vq, slot); break; } } r = virtio_enqueue_reserve(vq, slot, - sc->sc_rx_dmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1)); + vioq->rx_dmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1)); if (r != 0) { - vio_free_rx_mbuf(sc, slot); + vio_free_rx_mbuf(sc, vioq, slot); break; } - bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0, - sc->sc_rx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREREAD); + bus_dmamap_sync(vsc->sc_dmat, vioq->rx_dmamaps[slot], 0, + vioq->rx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_PREREAD); if (mrg_rxbuf) { - virtio_enqueue(vq, slot, sc->sc_rx_dmamaps[slot], 0); + virtio_enqueue(vq, slot, vioq->rx_dmamaps[slot], 0); } else { /* * Buggy kvm wants a buffer of exactly the size of * the header in this case, so we have to split in * two. */ - virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot], + virtio_enqueue_p(vq, slot, vioq->rx_dmamaps[slot], 0, sc->sc_hdr_size, 0); - virtio_enqueue_p(vq, slot, sc->sc_rx_dmamaps[slot], + virtio_enqueue_p(vq, slot, vioq->rx_dmamaps[slot], sc->sc_hdr_size, MCLBYTES - sc->sc_hdr_size, 0); } virtio_enqueue_commit(vsc, vq, slot, 0); done = 1; } - if_rxr_put(&sc->sc_rx_ring, slots); + if_rxr_put(&vioq->rx_ring, slots); if (done) virtio_notify(vsc, vq); @@ -1163,10 +1431,10 @@ vio_rx_offload(struct mbuf *m, struct virtio_net_hdr *hdr) /* dequeue received packets */ int -vio_rxeof(struct vio_softc *sc) +vio_rxeof(struct vio_queue *vioq) { + struct vio_softc *sc = vioq->sc; struct virtio_softc *vsc = sc->sc_virtio; - struct virtqueue *vq = &sc->sc_vq[VQRX]; struct ifnet *ifp = &sc->sc_ac.ac_if; struct mbuf_list ml = MBUF_LIST_INITIALIZER(); struct mbuf *m, *m0 = NULL, *mlast; @@ -1174,16 +1442,17 @@ vio_rxeof(struct vio_softc *sc) int slot, len, bufs_left; struct virtio_net_hdr *hdr; - while (virtio_dequeue(vsc, vq, &slot, &len) == 0) { + MUTEX_ASSERT_LOCKED(&vioq->rx_mtx); + while (virtio_dequeue(vsc, vioq->rx_vq, &slot, &len) == 0) { r = 1; - bus_dmamap_sync(vsc->sc_dmat, sc->sc_rx_dmamaps[slot], 0, - sc->sc_rx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_POSTREAD); - m = sc->sc_rx_mbufs[slot]; + bus_dmamap_sync(vsc->sc_dmat, vioq->rx_dmamaps[slot], 0, + vioq->rx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_POSTREAD); + m = vioq->rx_mbufs[slot]; KASSERT(m != NULL); - bus_dmamap_unload(vsc->sc_dmat, sc->sc_rx_dmamaps[slot]); - sc->sc_rx_mbufs[slot] = NULL; - virtio_dequeue_commit(vq, slot); - if_rxr_put(&sc->sc_rx_ring, 1); + bus_dmamap_unload(vsc->sc_dmat, vioq->rx_dmamaps[slot]); + vioq->rx_mbufs[slot] = NULL; + virtio_dequeue_commit(vioq->rx_vq, slot); + if_rxr_put(&vioq->rx_ring, 1); m->m_len = m->m_pkthdr.len = len; m->m_pkthdr.csum_flags = 0; if (m0 == NULL) { @@ -1216,8 +1485,8 @@ vio_rxeof(struct vio_softc *sc) m_freem(m0); } - if (ifiq_input(&ifp->if_rcv, &ml)) - if_rxr_livelocked(&sc->sc_rx_ring); + if (ifiq_input(vioq->ifiq, &ml)) + if_rxr_livelocked(&vioq->rx_ring); return r; } @@ -1227,13 +1496,15 @@ vio_rx_intr(struct virtqueue *vq) { struct virtio_softc *vsc = vq->vq_owner; struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2]; int r, sum = 0; + mtx_enter(&vioq->rx_mtx); again: - r = vio_rxeof(sc); + r = vio_rxeof(vioq); sum += r; if (r) { - vio_populate_rx_mbufs(sc); + vio_populate_rx_mbufs(sc, vioq); /* set used event index to the next slot */ if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX)) { if (virtio_start_vq_intr(vq->vq_owner, vq)) @@ -1241,33 +1512,37 @@ again: } } + mtx_leave(&vioq->rx_mtx); return sum; } void vio_rxtick(void *arg) { - struct virtqueue *vq = arg; - struct virtio_softc *vsc = vq->vq_owner; - struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; - int s; + struct vio_softc *sc = arg; + int i; - s = splnet(); - vio_populate_rx_mbufs(sc); - splx(s); + for (i = 0; i < sc->sc_nqueues; i++) { + mtx_enter(&sc->sc_q[i].rx_mtx); + vio_populate_rx_mbufs(sc, &sc->sc_q[i]); + mtx_leave(&sc->sc_q[i].rx_mtx); + } } /* free all the mbufs; called from if_stop(disable) */ void vio_rx_drain(struct vio_softc *sc) { - struct virtqueue *vq = &sc->sc_vq[VQRX]; - int i; - - for (i = 0; i < vq->vq_num; i++) { - if (sc->sc_rx_mbufs[i] == NULL) - continue; - vio_free_rx_mbuf(sc, i); + struct vio_queue *vioq; + int i, qidx; + + for (qidx = 0; qidx < sc->sc_nqueues; qidx++) { + vioq = &sc->sc_q[qidx]; + for (i = 0; i < vioq->rx_vq->vq_num; i++) { + if (vioq->rx_mbufs[i] == NULL) + continue; + vio_free_rx_mbuf(sc, vioq, i); + } } } @@ -1286,53 +1561,77 @@ vio_tx_intr(struct virtqueue *vq) { struct virtio_softc *vsc = vq->vq_owner; struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; - struct ifnet *ifp = &sc->sc_ac.ac_if; + struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2]; int r; r = vio_txeof(vq); - vio_start(ifp); + vio_start(vioq->ifq); return r; } void vio_txtick(void *arg) { - struct virtqueue *vq = arg; - int s = splnet(); - virtio_check_vq(vq->vq_owner, vq); - splx(s); + struct vio_softc *sc = arg; + int i; + + for (i = 0; i < sc->sc_nqueues; i++) + virtio_check_vq(sc->sc_virtio, sc->sc_q[i].tx_vq); } int -vio_txeof(struct virtqueue *vq) +vio_tx_dequeue(struct virtqueue *vq) { struct virtio_softc *vsc = vq->vq_owner; struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2]; struct ifnet *ifp = &sc->sc_ac.ac_if; struct mbuf *m; int r = 0; - int slot, len; + int slot, len, freed = 0; + MUTEX_ASSERT_LOCKED(&vioq->tx_mtx); if (!ISSET(ifp->if_flags, IFF_RUNNING)) return 0; while (virtio_dequeue(vsc, vq, &slot, &len) == 0) { - struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot]; + struct virtio_net_hdr *hdr = &vioq->tx_hdrs[slot]; r++; VIO_DMAMEM_SYNC(vsc, sc, hdr, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE); - bus_dmamap_sync(vsc->sc_dmat, sc->sc_tx_dmamaps[slot], 0, - sc->sc_tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_POSTWRITE); - m = sc->sc_tx_mbufs[slot]; - bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[slot]); - sc->sc_tx_mbufs[slot] = NULL; - virtio_dequeue_commit(vq, slot); + bus_dmamap_sync(vsc->sc_dmat, vioq->tx_dmamaps[slot], 0, + vioq->tx_dmamaps[slot]->dm_mapsize, BUS_DMASYNC_POSTWRITE); + m = vioq->tx_mbufs[slot]; + bus_dmamap_unload(vsc->sc_dmat, vioq->tx_dmamaps[slot]); + vioq->tx_mbufs[slot] = NULL; + freed += virtio_dequeue_commit(vq, slot); m_freem(m); } + KASSERT(vioq->tx_free_slots >= 0); + vioq->tx_free_slots += freed; + return r; +} + + +int +vio_txeof(struct virtqueue *vq) +{ + struct virtio_softc *vsc = vq->vq_owner; + struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; + struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2]; + int r; + + mtx_enter(&vioq->tx_mtx); + r = vio_tx_dequeue(vq); + mtx_leave(&vioq->tx_mtx); if (r) { - ifq_clr_oactive(&ifp->if_snd); - virtio_stop_vq_intr(vsc, &sc->sc_vq[VQTX]); + if (ifq_is_oactive(vioq->ifq)) { + mtx_enter(&vioq->tx_mtx); + virtio_stop_vq_intr(vsc, vq); + mtx_leave(&vioq->tx_mtx); + ifq_restart(vioq->ifq); + } } if (vq->vq_used_idx == vq->vq_avail_idx) timeout_del(&sc->sc_txtick); @@ -1342,10 +1641,10 @@ vio_txeof(struct virtqueue *vq) } int -vio_encap(struct vio_softc *sc, int slot, struct mbuf *m) +vio_encap(struct vio_queue *vioq, int slot, struct mbuf *m) { - struct virtio_softc *vsc = sc->sc_virtio; - bus_dmamap_t dmap= sc->sc_tx_dmamaps[slot]; + struct virtio_softc *vsc = vioq->sc->sc_virtio; + bus_dmamap_t dmap = vioq->tx_dmamaps[slot]; int r; r = bus_dmamap_load_mbuf(vsc->sc_dmat, dmap, m, @@ -1363,7 +1662,7 @@ vio_encap(struct vio_softc *sc, int slot, struct mbuf *m) default: return ENOBUFS; } - sc->sc_tx_mbufs[slot] = m; + vioq->tx_mbufs[slot] = m; return 0; } @@ -1372,15 +1671,22 @@ void vio_tx_drain(struct vio_softc *sc) { struct virtio_softc *vsc = sc->sc_virtio; - struct virtqueue *vq = &sc->sc_vq[VQTX]; - int i; - - for (i = 0; i < vq->vq_num; i++) { - if (sc->sc_tx_mbufs[i] == NULL) - continue; - bus_dmamap_unload(vsc->sc_dmat, sc->sc_tx_dmamaps[i]); - m_freem(sc->sc_tx_mbufs[i]); - sc->sc_tx_mbufs[i] = NULL; + struct vio_queue *vioq; + int i, q; + + for (q = 0; q < sc->sc_nqueues; q++) { + vioq = &sc->sc_q[q]; + mtx_enter(&vioq->tx_mtx); + for (i = 0; i < vioq->tx_vq->vq_num; i++) { + if (vioq->tx_mbufs[i] == NULL) + continue; + bus_dmamap_unload(vsc->sc_dmat, vioq->tx_dmamaps[i]); + m_freem(vioq->tx_mbufs[i]); + vioq->tx_mbufs[i] = NULL; + } + ifq_purge(vioq->ifq); + ifq_clr_oactive(vioq->ifq); + mtx_leave(&vioq->tx_mtx); } } @@ -1392,7 +1698,7 @@ int vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) { struct virtio_softc *vsc = sc->sc_virtio; - struct virtqueue *vq = &sc->sc_vq[VQCTL]; + struct virtqueue *vq = sc->sc_ctl_vq; int r, slot; splassert(IPL_NET); @@ -1450,11 +1756,76 @@ out: return r; } +/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */ +int +vio_ctrl_mq(struct vio_softc *sc) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = sc->sc_ctl_vq; + int r, slot; + + splassert(IPL_NET); + + if ((r = vio_wait_ctrl(sc)) != 0) + return r; + + sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MQ; + sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; + sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs, + sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD); + + r = virtio_enqueue_prep(vq, &slot); + if (r != 0) + panic("%s: %s virtio_enqueue_prep: control vq busy", + sc->sc_dev.dv_xname, __func__); + r = virtio_enqueue_reserve(vq, slot, 3); + if (r != 0) + panic("%s: %s virtio_enqueue_reserve: control vq busy", + sc->sc_dev.dv_xname, __func__); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_mq_pairs, + sizeof(*sc->sc_ctrl_mq_pairs), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), 0); + virtio_enqueue_commit(vsc, vq, slot, 1); + + if ((r = vio_wait_ctrl_done(sc)) != 0) + goto out; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs, + sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD); + + if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) { + r = 0; + } else { + printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET); + r = EIO; + } + + DPRINTF("%s: cmd %d %d: %d\n", __func__, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r); +out: + vio_ctrl_wakeup(sc, FREE); + return r; +} + int vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) { struct virtio_softc *vsc = sc->sc_virtio; - struct virtqueue *vq = &sc->sc_vq[VQCTL]; + struct virtqueue *vq = sc->sc_ctl_vq; int r, slot; splassert(IPL_NET); @@ -1543,6 +1914,8 @@ vio_wait_ctrl_done(struct vio_softc *sc) vio_ctrl_wakeup(sc, RESET); return ENXIO; } + if (cold) + virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq); } return r; } @@ -1559,18 +1932,23 @@ vio_ctrleof(struct virtqueue *vq) { struct virtio_softc *vsc = vq->vq_owner; struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; - int r = 0, ret, slot; + int r = 0, ret, slot, s; + KERNEL_LOCK(); + s = splnet(); again: ret = virtio_dequeue(vsc, vq, &slot, NULL); if (ret == ENOENT) - return r; + goto out; virtio_dequeue_commit(vq, slot); r++; vio_ctrl_wakeup(sc, DONE); if (virtio_start_vq_intr(vsc, vq)) goto again; +out: + splx(s); + KERNEL_UNLOCK(); return r; } @@ -1580,7 +1958,7 @@ vio_set_rx_filter(struct vio_softc *sc) { /* filter already set in sc_ctrl_mac_tbl */ struct virtio_softc *vsc = sc->sc_virtio; - struct virtqueue *vq = &sc->sc_vq[VQCTL]; + struct virtqueue *vq = sc->sc_ctl_vq; int r, slot; splassert(IPL_NET); diff --git a/sys/dev/pv/virtio.c b/sys/dev/pv/virtio.c index 394546ecf7f..549fa29c1cf 100644 --- a/sys/dev/pv/virtio.c +++ b/sys/dev/pv/virtio.c @@ -165,9 +165,9 @@ virtio_reinit_start(struct virtio_softc *sc) for (i = 0; i < sc->sc_nvqs; i++) { int n; struct virtqueue *vq = &sc->sc_vqs[i]; - n = virtio_read_queue_size(sc, vq->vq_index); - if (n == 0) /* vq disappeared */ + if (vq->vq_num == 0) /* not used */ continue; + n = virtio_read_queue_size(sc, vq->vq_index); if (n != vq->vq_num) { panic("%s: virtqueue size changed, vq index %d", sc->sc_dev.dv_xname, vq->vq_index); @@ -175,6 +175,7 @@ virtio_reinit_start(struct virtio_softc *sc) virtio_init_vq(sc, vq); virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr); } + sc->sc_ops->setup_intrs(sc); } void @@ -254,8 +255,11 @@ virtio_check_vqs(struct virtio_softc *sc) int i, r = 0; /* going backwards is better for if_vio */ - for (i = sc->sc_nvqs - 1; i >= 0; i--) + for (i = sc->sc_nvqs - 1; i >= 0; i--) { + if (sc->sc_vqs[i].vq_num == 0) /* not used */ + continue; r |= virtio_check_vq(sc, &sc->sc_vqs[i]); + } return r; } @@ -449,6 +453,11 @@ virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq) struct vq_entry *qe; int i = 0; + if (vq->vq_num == 0) { + /* virtio_alloc_vq() was never called */ + return 0; + } + /* device must be already deactivated */ /* confirm the vq is empty */ SLIST_FOREACH(qe, &vq->vq_freelist, qe_list) { @@ -847,22 +856,25 @@ virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq, * * Don't call this if you use statically allocated slots * and virtio_enqueue_trim(). + * + * returns the number of freed slots. */ int virtio_dequeue_commit(struct virtqueue *vq, int slot) { struct vq_entry *qe = &vq->vq_entries[slot]; struct vring_desc *vd = &vq->vq_desc[0]; - int s = slot; + int s = slot, r = 1; while (vd[s].flags & VRING_DESC_F_NEXT) { s = vd[s].next; vq_free_entry(vq, qe); qe = &vq->vq_entries[s]; + r++; } vq_free_entry(vq, qe); - return 0; + return r; } /* diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h index 367e166f2bb..ce6a835a939 100644 --- a/sys/dev/pv/virtiovar.h +++ b/sys/dev/pv/virtiovar.h @@ -103,7 +103,8 @@ struct vq_entry { struct virtqueue { struct virtio_softc *vq_owner; - unsigned int vq_num; /* queue size (# of entries) */ + unsigned int vq_num; /* queue size (# of entries), + * 0 if unused/non-existant */ unsigned int vq_mask; /* (1 << vq_num - 1) */ int vq_index; /* queue number (0, 1, ...) */ @@ -137,6 +138,7 @@ struct virtqueue { int (*vq_done)(struct virtqueue*); /* 1.x only: offset for notify address calculation */ uint32_t vq_notify_off; + int vq_intr_vec; }; struct virtio_feature_name { @@ -156,10 +158,13 @@ struct virtio_ops { void (*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t); uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t); void (*setup_queue)(struct virtio_softc *, struct virtqueue *, uint64_t); + void (*setup_intrs)(struct virtio_softc *); int (*get_status)(struct virtio_softc *); void (*set_status)(struct virtio_softc *, int); int (*neg_features)(struct virtio_softc *, const struct virtio_feature_name *); int (*poll_intr)(void *); + int (*intr_establish)(struct virtio_softc *, struct virtio_attach_args *, + int, struct cpu_info *, int (*)(void *), void *); }; #define VIRTIO_CHILD_ERROR ((void*)1) @@ -176,7 +181,7 @@ struct virtio_softc { int sc_indirect; int sc_version_1; - int sc_nvqs; /* set by child */ + int sc_nvqs; /* size of sc_vqs, set by child */ struct virtqueue *sc_vqs; /* set by child */ struct device *sc_child; /* set by child, @@ -202,6 +207,14 @@ struct virtio_softc { #define virtio_get_status(sc) (sc)->sc_ops->get_status(sc) #define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i) +/* + * virtio_intr_establish() only works if va_nintr > 1. If it is called by a + * child driver, the transport driver will skip automatic intr allocation and + * the child driver must allocate all required interrupts itself. Vector 0 is + * always used for the config change interrupt. + */ +#define virtio_intr_establish(sc, va, v, ci, fn, a) (sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a) + /* only for transport drivers */ #define virtio_device_reset(sc) virtio_set_status((sc), 0)