Index | Thread | Search

From:
Stefan Fritsch <sf@openbsd.org>
Subject:
Re: vio(4) multi-queue V6
To:
tech@openbsd.org
Date:
Fri, 6 Sep 2024 10:04:31 +0200

Download raw body.

Thread
  • Stefan Fritsch:

    vio(4) multi-queue V6

  • Hi,
    
    as noticed by Mark Patruck and Hrvoje Popovski, diff V5 caused problems 
    with LRO. New diff V6 below fixes that. It also contains fixes for races 
    when doing ifconfig down while processing packets.
    
    Cheers,
    Stefan
    
    
    diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
    index 604ffcab570..5a22c7f4823 100644
    --- a/sys/dev/fdt/virtio_mmio.c
    +++ b/sys/dev/fdt/virtio_mmio.c
    @@ -103,6 +103,9 @@ void		virtio_mmio_set_status(struct virtio_softc *, int);
     int		virtio_mmio_negotiate_features(struct virtio_softc *,
         const struct virtio_feature_name *);
     int		virtio_mmio_intr(void *);
    +int             virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *,
    +    int, struct cpu_info *, int (*)(void *), void *);
    +
     
     struct virtio_mmio_softc {
     	struct virtio_softc	sc_sc;
    @@ -151,6 +154,7 @@ const struct virtio_ops virtio_mmio_ops = {
     	virtio_mmio_set_status,
     	virtio_mmio_negotiate_features,
     	virtio_mmio_intr,
    +	virtio_mmio_intr_establish,
     };
     
     uint16_t
    @@ -522,3 +526,11 @@ virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx)
     	bus_space_write_4(sc->sc_iot, sc->sc_ioh, VIRTIO_MMIO_QUEUE_NOTIFY,
     	    idx);
     }
    +
    +int
    +virtio_mmio_intr_establish(struct virtio_softc *vsc,
    +    struct virtio_attach_args *va, int vec, struct cpu_info *ci,
    +    int (*func)(void *), void *arg)
    +{
    +	return ENXIO;
    +}
    diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
    index 4a0c9037cf7..15c00f859f6 100644
    --- a/sys/dev/pci/virtio_pci.c
    +++ b/sys/dev/pci/virtio_pci.c
    @@ -50,7 +50,7 @@
      * XXX: PCI-endian while the device specific registers are native endian.
      */
     
    -#define MAX_MSIX_VECS	8
    +#define MAX_MSIX_VECS	16
     
     struct virtio_pci_softc;
     struct virtio_pci_attach_args;
    @@ -62,7 +62,7 @@ int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p
     int		virtio_pci_detach(struct device *, int);
     
     void		virtio_pci_kick(struct virtio_softc *, uint16_t);
    -int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
    +int		virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset);
     uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
     uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
     uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
    @@ -80,8 +80,9 @@ int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_fe
     int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
     void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
     void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
    -int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *);
    +int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
     int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int);
    +int		virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
     void		virtio_pci_free_irqs(struct virtio_pci_softc *);
     int		virtio_pci_poll_intr(void *);
     int		virtio_pci_legacy_intr(void *);
    @@ -98,6 +99,7 @@ enum irq_type {
     	IRQ_NO_MSIX,
     	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
     	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
    +	IRQ_MSIX_CHILD,  /* assigned by child driver */
     };
     
     struct virtio_pci_intr {
    @@ -175,6 +177,7 @@ const struct virtio_ops virtio_pci_ops = {
     	virtio_pci_set_status,
     	virtio_pci_negotiate_features,
     	virtio_pci_poll_intr,
    +	virtio_pci_intr_establish,
     };
     
     static inline uint64_t
    @@ -646,9 +649,8 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux)
     		goto fail_0;
     	}
     
    -	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
     	sc->sc_irq_type = IRQ_NO_MSIX;
    -	if (virtio_pci_adjust_config_region(sc) != 0)
    +	if (virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0)
     		goto fail_0;
     
     	virtio_device_reset(vsc);
    @@ -670,7 +672,9 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux)
     		goto fail_1;
     	}
     
    -	if (virtio_pci_setup_msix(sc, &vpa, 0) == 0) {
    +	if (sc->sc_irq_type == IRQ_MSIX_CHILD) {
    +		intrstr = "msix";
    +	} else if (virtio_pci_setup_msix(sc, &vpa, 0) == 0) {
     		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
     		intrstr = "msix per-VQ";
     	} else if (virtio_pci_setup_msix(sc, &vpa, 1) == 0) {
    @@ -738,11 +742,14 @@ virtio_pci_detach(struct device *self, int flags)
     }
     
     int
    -virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
    +virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset)
     {
     	if (sc->sc_sc.sc_version_1)
     		return 0;
    -	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
    +	if (sc->sc_devcfg_offset == offset)
    +		return 0;
    +	sc->sc_devcfg_offset = offset;
    +	sc->sc_devcfg_iosize = sc->sc_iosize - offset;
     	sc->sc_devcfg_iot = sc->sc_iot;
     	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
     	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
    @@ -937,30 +944,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc,
     
     int
     virtio_pci_msix_establish(struct virtio_pci_softc *sc,
    -    struct virtio_pci_attach_args *vpa, int idx,
    +    struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci,
         int (*handler)(void *), void *ih_arg)
     {
     	struct virtio_softc *vsc = &sc->sc_sc;
     	pci_intr_handle_t ih;
    +	int r;
     
     	KASSERT(idx < sc->sc_nintr);
     
    -	if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) {
    +	r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih);
    +	if (r != 0) {
     #if VIRTIO_DEBUG
     		printf("%s[%d]: pci_intr_map_msix failed\n",
     		    vsc->sc_dev.dv_xname, idx);
     #endif
    -		return 1;
    +		return r;
     	}
     	snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d",
     	    vsc->sc_child->dv_xname, idx);
    -	sc->sc_intr[idx].ih = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
    -	    handler, ih_arg, sc->sc_intr[idx].name);
    +	sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl,
    +	    ci, handler, ih_arg, sc->sc_intr[idx].name);
     	if (sc->sc_intr[idx].ih == NULL) {
     		printf("%s[%d]: couldn't establish msix interrupt\n",
    -		    vsc->sc_dev.dv_xname, idx);
    -		return 1;
    +		    vsc->sc_child->dv_xname, idx);
    +		return ENOMEM;
     	}
    +	virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI);
     	return 0;
     }
     
    @@ -1010,8 +1020,8 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc)
     		}
     	}
     
    -	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
    -	virtio_pci_adjust_config_region(sc);
    +	/* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */
    +	virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI);
     }
     
     int
    @@ -1019,34 +1029,33 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
         struct virtio_pci_attach_args *vpa, int shared)
     {
     	struct virtio_softc *vsc = &sc->sc_sc;
    -	int i;
    +	int i, r = 0;
     
     	/* Shared needs config + queue */
     	if (shared && vpa->vpa_va.va_nintr < 1 + 1)
    -		return 1;
    +		return ERANGE;
     	/* Per VQ needs config + N * queue */
     	if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs)
    -		return 1;
    +		return ERANGE;
     
    -	if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc))
    -		return 1;
    -	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
    -	virtio_pci_adjust_config_region(sc);
    +	r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc);
    +	if (r != 0)
    +		return r;
     
     	if (shared) {
    -		if (virtio_pci_msix_establish(sc, vpa, 1,
    -		    virtio_pci_shared_queue_intr, vsc)) {
    +		r = virtio_pci_msix_establish(sc, vpa, 1, NULL,
    +		    virtio_pci_shared_queue_intr, vsc);
    +		if (r != 0)
     			goto fail;
    -		}
     
     		for (i = 0; i < vsc->sc_nvqs; i++)
     			vsc->sc_vqs[i].vq_intr_vec = 1;
     	} else {
     		for (i = 0; i < vsc->sc_nvqs; i++) {
    -			if (virtio_pci_msix_establish(sc, vpa, i + 1,
    -			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
    +			r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL,
    +			    virtio_pci_queue_intr, &vsc->sc_vqs[i]);
    +			if (r != 0)
     				goto fail;
    -			}
     			vsc->sc_vqs[i].vq_intr_vec = i + 1;
     		}
     	}
    @@ -1054,7 +1063,28 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
     	return 0;
     fail:
     	virtio_pci_free_irqs(sc);
    -	return 1;
    +	return r;
    +}
    +
    +int
    +virtio_pci_intr_establish(struct virtio_softc *vsc,
    +    struct virtio_attach_args *va, int vec, struct cpu_info *ci,
    +    int (*func)(void *), void *arg)
    +{
    +	struct virtio_pci_attach_args *vpa;
    +	struct virtio_pci_softc *sc;
    +
    +	if (vsc->sc_ops != &virtio_pci_ops)
    +		return ENXIO;
    +
    +	vpa = (struct virtio_pci_attach_args *)va;
    +	sc = (struct virtio_pci_softc *)vsc;
    +
    +	if (vec >= sc->sc_nintr || sc->sc_nintr <= 1)
    +		return ERANGE;
    +
    +	sc->sc_irq_type = IRQ_MSIX_CHILD;
    +	return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg);
     }
     
     /*
    diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
    index 7a37400584b..159a5c5d1dd 100644
    --- a/sys/dev/pv/if_vio.c
    +++ b/sys/dev/pv/if_vio.c
    @@ -32,7 +32,9 @@
     #include <sys/param.h>
     #include <sys/systm.h>
     #include <sys/device.h>
    +#include <sys/intrmap.h>
     #include <sys/mbuf.h>
    +#include <sys/mutex.h>
     #include <sys/sockio.h>
     #include <sys/timeout.h>
     
    @@ -63,8 +65,15 @@
      * if_vioreg.h:
      */
     /* Configuration registers */
    -#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
    -#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
    +#define VIRTIO_NET_CONFIG_MAC		 0 /*  8 bit x 6 byte */
    +#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16 bit */
    +#define VIRTIO_NET_CONFIG_MAX_QUEUES	 8 /* 16 bit */
    +#define VIRTIO_NET_CONFIG_MTU		10 /* 16 bit */
    +#define VIRTIO_NET_CONFIG_SPEED		12 /* 32 bit */
    +#define VIRTIO_NET_CONFIG_DUPLEX	16 /*  8 bit */
    +#define VIRTIO_NET_CONFIG_RSS_SIZE	17 /*  8 bit */
    +#define VIRTIO_NET_CONFIG_RSS_LEN	18 /* 16 bit */
    +#define VIRTIO_NET_CONFIG_HASH_TYPES	20 /* 16 bit */
     
     /* Feature bits */
     #define VIRTIO_NET_F_CSUM			(1ULL<<0)
    @@ -182,6 +191,11 @@ struct virtio_net_ctrl_cmd {
     # define VIRTIO_NET_CTRL_VLAN_ADD	0
     # define VIRTIO_NET_CTRL_VLAN_DEL	1
     
    +#define VIRTIO_NET_CTRL_MQ		4
    +# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
    +# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG		1
    +# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG		2
    +
     #define VIRTIO_NET_CTRL_GUEST_OFFLOADS	5
     # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET	0
     
    @@ -195,6 +209,12 @@ struct virtio_net_ctrl_rx {
     	uint8_t	onoff;
     } __packed;
     
    +struct virtio_net_ctrl_mq_pairs_set {
    +	uint16_t virtqueue_pairs;
    +};
    +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
    +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
    +
     struct virtio_net_ctrl_guest_offloads {
     	uint64_t offloads;
     } __packed;
    @@ -224,9 +244,13 @@ struct vio_queue {
     	struct mbuf		**viq_rxmbufs;
     	struct mbuf		**viq_txmbufs;
     	struct if_rxring	  viq_rxring;
    +	struct ifiqueue		 *viq_ifiq;
    +	struct ifqueue		 *viq_ifq;
     	struct virtqueue	 *viq_rxvq;
     	struct virtqueue	 *viq_txvq;
    -};
    +	struct mutex		  viq_txmtx, viq_rxmtx;
    +	int			  viq_txfree_slots;
    +} __aligned(64);
     
     struct vio_softc {
     	struct device		sc_dev;
    @@ -246,16 +270,20 @@ struct vio_softc {
     	caddr_t			sc_dma_kva;
     
     	int			sc_hdr_size;
    -	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
    -	struct virtio_net_ctrl_status *sc_ctrl_status;
    -	struct virtio_net_ctrl_rx *sc_ctrl_rx;
    -	struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
    -	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
    +	struct virtio_net_ctrl_cmd		*sc_ctrl_cmd;
    +	struct virtio_net_ctrl_status		*sc_ctrl_status;
    +	struct virtio_net_ctrl_rx		*sc_ctrl_rx;
    +	struct virtio_net_ctrl_mq_pairs_set	*sc_ctrl_mq_pairs;
    +	struct virtio_net_ctrl_guest_offloads	*sc_ctrl_guest_offloads;
    +	struct virtio_net_ctrl_mac_tbl		*sc_ctrl_mac_tbl_uc;
     #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
    -	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
    +	struct virtio_net_ctrl_mac_tbl		*sc_ctrl_mac_tbl_mc;
     
    +	struct intrmap		*sc_intrmap;
     	struct vio_queue	*sc_q;
     	uint16_t		sc_nqueues;
    +	int			sc_tx_slots_per_req;
    +	int			sc_rx_mbuf_size;
     
     	enum vio_ctrl_state	sc_ctrl_inuse;
     
    @@ -269,7 +297,6 @@ struct vio_softc {
     #define VIO_HAVE_MRG_RXBUF(sc)					\
     	((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
     
    -#define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
     #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
     #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
     #define VIRTIO_NET_CTRL_TIMEOUT		(5*1000*1000*1000ULL) /* 5 seconds */
    @@ -286,7 +313,7 @@ void	vio_attach(struct device *, struct device *, void *);
     /* ifnet interface functions */
     int	vio_init(struct ifnet *);
     void	vio_stop(struct ifnet *, int);
    -void	vio_start(struct ifnet *);
    +void	vio_start(struct ifqueue *);
     int	vio_ioctl(struct ifnet *, u_long, caddr_t);
     void	vio_get_lladdr(struct arpcom *ac, struct virtio_softc *vsc);
     void	vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc);
    @@ -302,25 +329,32 @@ void	vio_rxtick(void *);
     
     /* tx */
     int	vio_tx_intr(struct virtqueue *);
    +int	vio_tx_dequeue(struct virtqueue *);
     int	vio_txeof(struct virtqueue *);
     void	vio_tx_drain(struct vio_softc *);
     int	vio_encap(struct vio_queue *, int, struct mbuf *);
     void	vio_txtick(void *);
     
    +int	vio_queue_intr(void *);
    +int	vio_config_intr(void *);
    +int	vio_ctrl_intr(void *);
    +
     /* other control */
     void	vio_link_state(struct ifnet *);
     int	vio_config_change(struct virtio_softc *);
     int	vio_ctrl_rx(struct vio_softc *, int, int);
    +int	vio_ctrl_mq(struct vio_softc *);
     int	vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
     int	vio_set_rx_filter(struct vio_softc *);
     void	vio_iff(struct vio_softc *);
     int	vio_media_change(struct ifnet *);
     void	vio_media_status(struct ifnet *, struct ifmediareq *);
     int	vio_ctrleof(struct virtqueue *);
    -int	vio_wait_ctrl(struct vio_softc *sc);
    -int	vio_wait_ctrl_done(struct vio_softc *sc);
    +int	vio_ctrl_start(struct vio_softc *, uint8_t, uint8_t, int, int *);
    +int	vio_ctrl_submit(struct vio_softc *, int);
    +void	vio_ctrl_finish(struct vio_softc *);
     void	vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
    -int	vio_alloc_mem(struct vio_softc *);
    +int	vio_alloc_mem(struct vio_softc *, int);
     int	vio_alloc_dmamem(struct vio_softc *);
     void	vio_free_dmamem(struct vio_softc *);
     
    @@ -397,6 +431,8 @@ vio_free_dmamem(struct vio_softc *sc)
      *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
      *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
      *			 (WRITE)
    + *   sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE)
    + *   sc_ctrl_guest_offloads: configure offload features (WRITE)
      *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
      *			 class command (WRITE)
      *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
    @@ -415,7 +451,7 @@ vio_free_dmamem(struct vio_softc *sc)
      *   viq_txmbufs[slot]:		mbuf pointer array for sent frames
      */
     int
    -vio_alloc_mem(struct vio_softc *sc)
    +vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
     {
     	struct virtio_softc	*vsc = sc->sc_virtio;
     	struct ifnet		*ifp = &sc->sc_ac.ac_if;
    @@ -438,6 +474,7 @@ vio_alloc_mem(struct vio_softc *sc)
     		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
     		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
     		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
    +		allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1;
     		allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
     		allocsize += VIO_CTRL_MAC_INFO_SIZE;
     	}
    @@ -463,6 +500,8 @@ vio_alloc_mem(struct vio_softc *sc)
     		offset += sizeof(*sc->sc_ctrl_status);
     		sc->sc_ctrl_rx = (void *)(kva + offset);
     		offset += sizeof(*sc->sc_ctrl_rx);
    +		sc->sc_ctrl_mq_pairs = (void *)(kva + offset);
    +		offset += sizeof(*sc->sc_ctrl_mq_pairs);
     		sc->sc_ctrl_guest_offloads = (void *)(kva + offset);
     		offset += sizeof(*sc->sc_ctrl_guest_offloads);
     		sc->sc_ctrl_mac_tbl_uc = (void *)(kva + offset);
    @@ -492,8 +531,9 @@ vio_alloc_mem(struct vio_softc *sc)
     		vioq->viq_txmbufs = vioq->viq_rxmbufs + rxqsize;
     
     		for (i = 0; i < rxqsize; i++) {
    -			r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES,
    -			    MAXMCLBYTES/PAGE_SIZE + 1, MCLBYTES, 0,
    +			r = bus_dmamap_create(vsc->sc_dmat,
    +			    sc->sc_rx_mbuf_size + sc->sc_hdr_size, 2,
    +			    sc->sc_rx_mbuf_size, 0,
     			    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
     			    &vioq->viq_rxdmamaps[i]);
     			if (r != 0)
    @@ -502,7 +542,7 @@ vio_alloc_mem(struct vio_softc *sc)
     
     		for (i = 0; i < txqsize; i++) {
     			r = bus_dmamap_create(vsc->sc_dmat, txsize,
    -			    VIRTIO_NET_TX_MAXNSEGS, txsize, 0,
    +			    tx_max_segments, txsize, 0,
     			    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
     			    &vioq->viq_txdmamaps[i]);
     			if (r != 0)
    @@ -585,7 +625,8 @@ vio_attach(struct device *parent, struct device *self, void *aux)
     {
     	struct vio_softc *sc = (struct vio_softc *)self;
     	struct virtio_softc *vsc = (struct virtio_softc *)parent;
    -	int i;
    +	struct virtio_attach_args *va = aux;
    +	int i, r, tx_max_segments;
     	struct ifnet *ifp = &sc->sc_ac.ac_if;
     
     	if (vsc->sc_child != NULL) {
    @@ -597,13 +638,16 @@ vio_attach(struct device *parent, struct device *self, void *aux)
     	sc->sc_virtio = vsc;
     
     	vsc->sc_child = self;
    -	vsc->sc_ipl = IPL_NET;
    +	vsc->sc_ipl = IPL_NET | IPL_MPSAFE;
     	vsc->sc_config_change = NULL;
     	vsc->sc_driver_features = VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS |
     	    VIRTIO_NET_F_CTRL_VQ | VIRTIO_NET_F_CTRL_RX |
     	    VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
     	    VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM;
     
    +	if (va->va_nintr > 3)
    +		vsc->sc_driver_features |= VIRTIO_NET_F_MQ;
    +
     	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
     	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
     
    @@ -613,10 +657,23 @@ vio_attach(struct device *parent, struct device *self, void *aux)
     
     	virtio_negotiate_features(vsc, virtio_net_feature_names);
     
    -	sc->sc_nqueues = 1;
    -	vsc->sc_nvqs = 2 * sc->sc_nqueues;
    -	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
    -		vsc->sc_nvqs++;
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
    +		i = virtio_read_device_config_2(vsc,
    +		    VIRTIO_NET_CONFIG_MAX_QUEUES);
    +		vsc->sc_nvqs = 2 * i + 1;
    +		i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
    +		sc->sc_intrmap = intrmap_create(&sc->sc_dev, i,
    +		    va->va_nintr - 2, 0);
    +		sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
    +		printf(": %u queue%s", sc->sc_nqueues,
    +		    sc->sc_nqueues > 1 ? "s"  : "");
    +	} else {
    +		sc->sc_nqueues = 1;
    +		printf(": 1 queue");
    +		vsc->sc_nvqs = 2;
    +		if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
    +			vsc->sc_nvqs++;
    +	}
     
     	vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF,
     	    M_WAITOK|M_ZERO);
    @@ -644,16 +701,60 @@ vio_attach(struct device *parent, struct device *self, void *aux)
     	} else {
     		sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
     	}
    +
    +	ifp->if_capabilities = 0;
    +	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
    +	ifp->if_xflags = IFXF_MPSAFE;
    +#if NVLAN > 0
    +	ifp->if_capabilities |= IFCAP_VLAN_MTU;
    +	ifp->if_capabilities |= IFCAP_VLAN_HWOFFLOAD;
    +#endif
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM))
    +		ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4|
    +		    IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6;
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4))
    +		ifp->if_capabilities |= IFCAP_TSOv4;
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
    +		ifp->if_capabilities |= IFCAP_TSOv6;
    +
    +	sc->sc_rx_mbuf_size = MCLBYTES;
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) &&
    +	    (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) ||
    +	     virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) {
    +		ifp->if_xflags |= IFXF_LRO;
    +		ifp->if_capabilities |= IFCAP_LRO;
    +		sc->sc_rx_mbuf_size = 4 * 1024;
    +	}
    +
     	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
     		ifp->if_hardmtu = MAXMCLBYTES;
     	else
    -		ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
    +		ifp->if_hardmtu = sc->sc_rx_mbuf_size - sc->sc_hdr_size -
    +		    ETHER_HDR_LEN;
    +
    +	/* defrag for longer mbuf chains */
    +	tx_max_segments = 16;
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4) ||
    +	    virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) {
    +		/*
    +		 * With TSO, we may get 64K packets and want to be able to
    +		 * send longer chains without defragmenting
    +		 */
    +		tx_max_segments = 32;
    +	}
    +
    +	if (virtio_has_feature(vsc, VIRTIO_F_RING_INDIRECT_DESC))
    +		sc->sc_tx_slots_per_req = 1;
    +	else
    +		sc->sc_tx_slots_per_req = tx_max_segments + 1;
     
     	for (i = 0; i < sc->sc_nqueues; i++) {
     		int vqidx = 2 * i;
     		struct vio_queue *vioq = &sc->sc_q[i];
     
     		vioq->viq_rxvq = &vsc->sc_vqs[vqidx];
    +		mtx_init(&vioq->viq_txmtx, IPL_NET);
    +		mtx_init(&vioq->viq_rxmtx, IPL_NET);
     		vioq->viq_sc = sc;
     		if (virtio_alloc_vq(vsc, vioq->viq_rxvq, vqidx, 2, "rx") != 0)
     			goto err;
    @@ -663,7 +764,7 @@ vio_attach(struct device *parent, struct device *self, void *aux)
     		vqidx++;
     		vioq->viq_txvq = &vsc->sc_vqs[vqidx];
     		if (virtio_alloc_vq(vsc, vioq->viq_txvq, vqidx,
    -		    VIRTIO_NET_TX_MAXNSEGS + 1, "tx") != 0) {
    +		    tx_max_segments + 1, "tx") != 0) {
     			goto err;
     		}
     		vioq->viq_txvq->vq_done = vio_tx_intr;
    @@ -671,57 +772,102 @@ vio_attach(struct device *parent, struct device *self, void *aux)
     			virtio_postpone_intr_far(vioq->viq_txvq);
     		else
     			virtio_stop_vq_intr(vsc, vioq->viq_txvq);
    +		vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1;
    +		KASSERT(vioq->viq_txfree_slots > sc->sc_tx_slots_per_req);
    +		if (vioq->viq_txvq->vq_num != sc->sc_q[0].viq_txvq->vq_num) {
    +			printf("inequal tx queue size %d: %d != %d\n", i,
    +			    vioq->viq_txvq->vq_num,
    +			    sc->sc_q[0].viq_txvq->vq_num);
    +			goto err;
    +		}
    +		DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->viq_rxvq,
    +		    vioq->viq_txvq);
    +
    +		if (sc->sc_intrmap != NULL) {
    +			vioq->viq_rxvq->vq_intr_vec = i + 2;
    +			vioq->viq_txvq->vq_intr_vec = i + 2;
    +		}
     	}
     
     	/* control queue */
     	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) {
    -		sc->sc_ctl_vq = &vsc->sc_vqs[2];
    -		if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, 2, 1,
    -		    "control") != 0)
    +		i = 2;
    +		if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
    +			i = 2 * virtio_read_device_config_2(vsc,
    +			    VIRTIO_NET_CONFIG_MAX_QUEUES);
    +		}
    +		sc->sc_ctl_vq =  &vsc->sc_vqs[i];
    +		if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, "control") != 0)
     			goto err;
     		sc->sc_ctl_vq->vq_done = vio_ctrleof;
    +		if (sc->sc_intrmap != NULL)
    +			sc->sc_ctl_vq->vq_intr_vec = 1;
     		virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
     	}
     
    -	if (vio_alloc_mem(sc) < 0)
    +	if (sc->sc_intrmap) {
    +		r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr,
    +		    vsc);
    +		if (r != 0) {
    +			printf("%s: cannot alloc config intr: %d\n",
    +			    sc->sc_dev.dv_xname, r);
    +			goto err;
    +		}
    +		r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr,
    +		    sc->sc_ctl_vq);
    +		if (r != 0) {
    +			printf("%s: cannot alloc ctrl intr: %d\n",
    +			    sc->sc_dev.dv_xname, r);
    +			goto err;
    +		}
    +		for (i = 0; i < sc->sc_nqueues; i++) {
    +			struct cpu_info *ci = NULL;
    +			ci = intrmap_cpu(sc->sc_intrmap, i);
    +			r = virtio_intr_establish(vsc, va, i + 2, ci,
    +			    vio_queue_intr, &sc->sc_q[i]);
    +			if (r != 0) {
    +				printf("%s: cannot alloc q%d intr: %d\n",
    +				    sc->sc_dev.dv_xname, i, r);
    +				goto err;
    +			}
    +		}
    +	}
    +
    +	if (vio_alloc_mem(sc, tx_max_segments) < 0)
     		goto err;
     
     	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
     	ifp->if_softc = sc;
    -	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
    -	ifp->if_start = vio_start;
    +	ifp->if_qstart = vio_start;
     	ifp->if_ioctl = vio_ioctl;
    -	ifp->if_capabilities = 0;
    -#if NVLAN > 0
    -	ifp->if_capabilities |= IFCAP_VLAN_MTU;
    -	ifp->if_capabilities |= IFCAP_VLAN_HWOFFLOAD;
    -#endif
    -	if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM))
    -		ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4|
    -		    IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6;
    -	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4))
    -		ifp->if_capabilities |= IFCAP_TSOv4;
    -	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
    -		ifp->if_capabilities |= IFCAP_TSOv6;
    -
    -	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) &&
    -	    (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) ||
    -	     virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) {
    -		ifp->if_xflags |= IFXF_LRO;
    -		ifp->if_capabilities |= IFCAP_LRO;
    -	}
     
     	ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
     	ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
     	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
     	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
     	vsc->sc_config_change = vio_config_change;
    -	timeout_set(&sc->sc_txtick, vio_txtick, sc->sc_q[0].viq_txvq);
    -	timeout_set(&sc->sc_rxtick, vio_rxtick, sc->sc_q[0].viq_rxvq);
    +	timeout_set(&sc->sc_txtick, vio_txtick, sc);
    +	timeout_set(&sc->sc_rxtick, vio_rxtick, sc);
     
     	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
    +
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
    +		/* ctrl queue works only after DRIVER_OK */
    +		vio_ctrl_mq(sc);
    +	}
    +
     	if_attach(ifp);
     	ether_ifattach(ifp);
    +	vio_link_state(ifp);
    +
    +	if_attach_queues(ifp, sc->sc_nqueues);
    +	if_attach_iqueues(ifp, sc->sc_nqueues);
    +
    +	for (i = 0; i < sc->sc_nqueues; i++) {
    +		ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i];
    +		sc->sc_q[i].viq_ifq = ifp->if_ifqs[i];
    +		sc->sc_q[i].viq_ifiq = ifp->if_iqs[i];
    +	}
     
     	return;
     
    @@ -755,12 +901,41 @@ vio_link_state(struct ifnet *ifp)
     	}
     }
     
    +/* interrupt handlers for multi-queue */
    +int
    +vio_queue_intr(void *arg)
    +{
    +	struct vio_queue *vioq = arg;
    +	struct virtio_softc *vsc = vioq->viq_sc->sc_virtio;
    +	int r;
    +	r = virtio_check_vq(vsc, vioq->viq_txvq);
    +	r |= virtio_check_vq(vsc, vioq->viq_rxvq);
    +	return r;
    +}
    +
    +int
    +vio_config_intr(void *arg)
    +{
    +	struct virtio_softc *vsc = arg;
    +	return vio_config_change(vsc);
    +}
    +
    +int
    +vio_ctrl_intr(void *arg)
    +{
    +	struct virtqueue *vq = arg;
    +	return virtio_check_vq(vq->vq_owner, vq);
    +}
    +
    +
     int
     vio_config_change(struct virtio_softc *vsc)
     {
     	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    +	KERNEL_LOCK();
     	vio_link_state(&sc->sc_ac.ac_if);
     	vio_needs_reset(sc);
    +	KERNEL_UNLOCK();
     	return 1;
     }
     
    @@ -796,12 +971,14 @@ vio_init(struct ifnet *ifp)
     	for (qidx = 0; qidx < sc->sc_nqueues; qidx++) {
     		struct vio_queue *vioq = &sc->sc_q[qidx];
     
    +		mtx_enter(&vioq->viq_rxmtx);
     		if_rxr_init(&vioq->viq_rxring,
    -		    2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
    +		    2 * ((ifp->if_hardmtu / sc->sc_rx_mbuf_size) + 1),
     		    vioq->viq_rxvq->vq_num);
     		vio_populate_rx_mbufs(sc, vioq);
    +		ifq_clr_oactive(vioq->viq_ifq);
    +		mtx_leave(&vioq->viq_rxmtx);
     	}
    -	ifq_clr_oactive(&ifp->if_snd);
     	vio_iff(sc);
     	vio_link_state(ifp);
     
    @@ -836,11 +1013,13 @@ vio_stop(struct ifnet *ifp, int disable)
     	CLR(ifp->if_flags, IFF_RUNNING);
     	timeout_del(&sc->sc_txtick);
     	timeout_del(&sc->sc_rxtick);
    -	ifq_clr_oactive(&ifp->if_snd);
     	/* only way to stop I/O and DMA is resetting... */
     	virtio_reset(vsc);
    -	for (i = 0; i < sc->sc_nqueues; i++)
    +	for (i = 0; i < sc->sc_nqueues; i++) {
    +		mtx_enter(&sc->sc_q[i].viq_rxmtx);
     		vio_rxeof(&sc->sc_q[i]);
    +		mtx_leave(&sc->sc_q[i].viq_rxmtx);
    +	}
     
     	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
     		vio_ctrl_wakeup(sc, RESET);
    @@ -856,6 +1035,8 @@ vio_stop(struct ifnet *ifp, int disable)
     	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
     		virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
     	virtio_reinit_end(vsc);
    +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ))
    +		vio_ctrl_mq(sc);
     	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
     		vio_ctrl_wakeup(sc, FREE);
     }
    @@ -942,35 +1123,42 @@ vio_tx_offload(struct virtio_net_hdr *hdr, struct mbuf *m)
     }
     
     void
    -vio_start(struct ifnet *ifp)
    +vio_start(struct ifqueue *viq_ifq)
     {
    +	struct ifnet *ifp = viq_ifq->ifq_if;
    +	struct vio_queue *vioq = viq_ifq->ifq_softc;
     	struct vio_softc *sc = ifp->if_softc;
     	struct virtio_softc *vsc = sc->sc_virtio;
    -	struct vio_queue *vioq = &sc->sc_q[0];
     	struct virtqueue *vq = vioq->viq_txvq;
     	struct mbuf *m;
    -	int queued = 0;
    +	int queued = 0, free_slots, used_slots;
     
    -	vio_txeof(vq);
    +	mtx_enter(&vioq->viq_txmtx);
    +	vio_tx_dequeue(vq);
     
    -	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
    -		return;
    -	if (ifq_empty(&ifp->if_snd))
    -		return;
     
     again:
    +	free_slots = vioq->viq_txfree_slots;
    +	KASSERT(free_slots >= 0);
    +	used_slots = 0;
     	for (;;) {
     		int slot, r;
     		struct virtio_net_hdr *hdr;
     
    -		m = ifq_deq_begin(&ifp->if_snd);
    +		if (free_slots - used_slots < sc->sc_tx_slots_per_req) {
    +			ifq_set_oactive(viq_ifq);
    +			break;
    +		}
    +
    +		m = ifq_dequeue(viq_ifq);
     		if (m == NULL)
     			break;
     
     		r = virtio_enqueue_prep(vq, &slot);
     		if (r == EAGAIN) {
    -			ifq_deq_rollback(&ifp->if_snd, m);
    -			ifq_set_oactive(&ifp->if_snd);
    +			printf("%s: virtio_enqueue_prep failed?\n", __func__);
    +			m_freem(m);
    +			viq_ifq->ifq_errors++;
     			break;
     		}
     		if (r != 0)
    @@ -984,22 +1172,27 @@ again:
     		r = vio_encap(vioq, slot, m);
     		if (r != 0) {
     			virtio_enqueue_abort(vq, slot);
    -			ifq_deq_commit(&ifp->if_snd, m);
     			m_freem(m);
    -			ifp->if_oerrors++;
    +			viq_ifq->ifq_errors++;
     			continue;
     		}
     		r = virtio_enqueue_reserve(vq, slot,
     		    vioq->viq_txdmamaps[slot]->dm_nsegs + 1);
     		if (r != 0) {
    +			printf("%s: virtio_enqueue_reserve failed?\n",
    +			    __func__);
    +			m_freem(m);
    +			viq_ifq->ifq_errors++;
     			bus_dmamap_unload(vsc->sc_dmat,
     			    vioq->viq_txdmamaps[slot]);
    -			ifq_deq_rollback(&ifp->if_snd, m);
     			vioq->viq_txmbufs[slot] = NULL;
    -			ifq_set_oactive(&ifp->if_snd);
     			break;
     		}
    -		ifq_deq_commit(&ifp->if_snd, m);
    +		if (sc->sc_tx_slots_per_req == 1)
    +			used_slots++;
    +		else
    +			used_slots += vioq->viq_txdmamaps[slot]->dm_nsegs + 1;
    +
     
     		bus_dmamap_sync(vsc->sc_dmat, vioq->viq_txdmamaps[slot], 0,
     		    vioq->viq_txdmamaps[slot]->dm_mapsize,
    @@ -1013,14 +1206,22 @@ again:
     			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
     #endif
     	}
    -	if (ifq_is_oactive(&ifp->if_snd)) {
    +	 if (used_slots > 0) {
    +		if (used_slots > vioq->viq_txfree_slots)
    +			printf("%s: used_slots %d viq_txfree_slots %d "
    +			    "free_slots %d\n", __func__, used_slots,
    +			    vioq->viq_txfree_slots, free_slots);
    +		vioq->viq_txfree_slots -= used_slots;
    +		KASSERT(vioq->viq_txfree_slots >= 0);
    +	}
    +	if (ifq_is_oactive(viq_ifq)) {
     		int r;
     		if (virtio_has_feature(vsc, VIRTIO_F_RING_EVENT_IDX))
    -			r = virtio_postpone_intr_smart(vioq->viq_txvq);
    +			r = virtio_postpone_intr_smart(vq);
     		else
    -			r = virtio_start_vq_intr(vsc, vioq->viq_txvq);
    +			r = virtio_start_vq_intr(vsc, vq);
     		if (r) {
    -			vio_txeof(vq);
    +			vio_tx_dequeue(vq);
     			goto again;
     		}
     	}
    @@ -1029,6 +1230,7 @@ again:
     		virtio_notify(vsc, vq);
     		timeout_add_sec(&sc->sc_txtick, 1);
     	}
    +	mtx_leave(&vioq->viq_txmtx);
     }
     
     #if VIRTIO_DEBUG
    @@ -1041,9 +1243,11 @@ vio_dump(struct vio_softc *sc)
     
     	printf("%s status dump:\n", ifp->if_xname);
     	printf("tx tick active: %d\n", !timeout_triggered(&sc->sc_txtick));
    +	printf("max tx slots per req %d\n", sc->sc_tx_slots_per_req);
     	printf("rx tick active: %d\n", !timeout_triggered(&sc->sc_rxtick));
     	for (i = 0; i < sc->sc_nqueues; i++) {
     		printf("%d: TX virtqueue:\n", i);
    +		printf("  tx free slots %d\n", sc->sc_q[i].viq_txfree_slots);
     		virtio_vq_dump(sc->sc_q[i].viq_txvq);
     		printf("%d: RX virtqueue:\n", i);
     		virtio_vq_dump(sc->sc_q[i].viq_rxvq);
    @@ -1056,6 +1260,33 @@ vio_dump(struct vio_softc *sc)
     }
     #endif
     
    +static int
    +vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri)
    +{
    +	struct if_rxring_info *ifrs, *ifr;
    +	int error;
    +	unsigned int i;
    +
    +	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
    +	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
    +	if (ifrs == NULL)
    +		return (ENOMEM);
    +
    +	for (i = 0; i < sc->sc_nqueues; i++) {
    +		ifr = &ifrs[i];
    +
    +		ifr->ifr_size = sc->sc_rx_mbuf_size;
    +		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
    +		ifr->ifr_info = sc->sc_q[i].viq_rxring;
    +	}
    +
    +	error = if_rxr_info_ioctl(ifri, i, ifrs);
    +
    +	free(ifrs, M_TEMP, i * sizeof(*ifrs));
    +
    +	return (error);
    +}
    +
     int
     vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
     {
    @@ -1090,8 +1321,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
     		r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
     		break;
     	case SIOCGIFRXR:
    -		r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
    -		    NULL, MCLBYTES, &sc->sc_q[0].viq_rxring);
    +		r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
     		break;
     	default:
     		r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
    @@ -1116,7 +1346,7 @@ vio_add_rx_mbuf(struct vio_softc *sc, struct vio_queue *vioq, int i)
     	struct mbuf *m;
     	int r;
     
    -	m = MCLGETL(NULL, M_DONTWAIT, MCLBYTES);
    +	m = MCLGETL(NULL, M_DONTWAIT, sc->sc_rx_mbuf_size);
     	if (m == NULL)
     		return ENOBUFS;
     	vioq->viq_rxmbufs[i] = m;
    @@ -1152,6 +1382,7 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
     	struct virtqueue *vq = vioq->viq_rxvq;
     	int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
     
    +	MUTEX_ASSERT_LOCKED(&vioq->viq_rxmtx);
     	for (slots = if_rxr_get(&vioq->viq_rxring, vq->vq_num);
     	    slots > 0; slots--) {
     		int slot;
    @@ -1188,7 +1419,8 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
     			virtio_enqueue_p(vq, slot, vioq->viq_rxdmamaps[slot],
     			    0, sc->sc_hdr_size, 0);
     			virtio_enqueue_p(vq, slot, vioq->viq_rxdmamaps[slot],
    -			    sc->sc_hdr_size, MCLBYTES - sc->sc_hdr_size, 0);
    +			    sc->sc_hdr_size,
    +			    sc->sc_rx_mbuf_size - sc->sc_hdr_size, 0);
     		}
     		virtio_enqueue_commit(vsc, vq, slot, 0);
     		done = 1;
    @@ -1253,6 +1485,7 @@ vio_rxeof(struct vio_queue *vioq)
     	int slot, len, bufs_left;
     	struct virtio_net_hdr *hdr;
     
    +	MUTEX_ASSERT_LOCKED(&vioq->viq_rxmtx);
     	while (virtio_dequeue(vsc, vioq->viq_rxvq, &slot, &len) == 0) {
     		r = 1;
     		bus_dmamap_sync(vsc->sc_dmat, vioq->viq_rxdmamaps[slot], 0,
    @@ -1296,7 +1529,7 @@ vio_rxeof(struct vio_queue *vioq)
     		m_freem(m0);
     	}
     
    -	if (ifiq_input(&ifp->if_rcv, &ml))
    +	if (ifiq_input(vioq->viq_ifiq, &ml))
     		if_rxr_livelocked(&vioq->viq_rxring);
     
     	return r;
    @@ -1311,6 +1544,7 @@ vio_rx_intr(struct virtqueue *vq)
     	struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2];
     	int r, sum = 0;
     
    +	mtx_enter(&vioq->viq_rxmtx);
     again:
     	r = vio_rxeof(vioq);
     	sum += r;
    @@ -1323,24 +1557,21 @@ again:
     		}
     	}
     
    +	mtx_leave(&vioq->viq_rxmtx);
     	return sum;
     }
     
     void
     vio_rxtick(void *arg)
     {
    -	struct virtqueue *vq = arg;
    -	struct virtio_softc *vsc = vq->vq_owner;
    -	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    -	struct vio_queue *vioq;
    -	int s, qidx;
    +	struct vio_softc *sc = arg;
    +	int i;
     
    -	s = splnet();
    -	for (qidx = 0; qidx < sc->sc_nqueues; qidx++) {
    -		vioq = &sc->sc_q[qidx];
    -		vio_populate_rx_mbufs(sc, vioq);
    +	for (i = 0; i < sc->sc_nqueues; i++) {
    +		mtx_enter(&sc->sc_q[i].viq_rxmtx);
    +		vio_populate_rx_mbufs(sc, &sc->sc_q[i]);
    +		mtx_leave(&sc->sc_q[i].viq_rxmtx);
     	}
    -	splx(s);
     }
     
     /* free all the mbufs; called from if_stop(disable) */
    @@ -1375,25 +1606,26 @@ vio_tx_intr(struct virtqueue *vq)
     {
     	struct virtio_softc *vsc = vq->vq_owner;
     	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    -	struct ifnet *ifp = &sc->sc_ac.ac_if;
    +	struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2];
     	int r;
     
     	r = vio_txeof(vq);
    -	vio_start(ifp);
    +	vio_start(vioq->viq_ifq);
     	return r;
     }
     
     void
     vio_txtick(void *arg)
     {
    -	struct virtqueue *vq = arg;
    -	int s = splnet();
    -	virtio_check_vq(vq->vq_owner, vq);
    -	splx(s);
    +	struct vio_softc *sc = arg;
    +	int i;
    +
    +	for (i = 0; i < sc->sc_nqueues; i++)
    +		virtio_check_vq(sc->sc_virtio, sc->sc_q[i].viq_txvq);
     }
     
     int
    -vio_txeof(struct virtqueue *vq)
    +vio_tx_dequeue(struct virtqueue *vq)
     {
     	struct virtio_softc *vsc = vq->vq_owner;
     	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    @@ -1402,8 +1634,9 @@ vio_txeof(struct virtqueue *vq)
     	struct ifnet *ifp = &sc->sc_ac.ac_if;
     	struct mbuf *m;
     	int r = 0;
    -	int slot, len;
    +	int slot, len, freed = 0;
     
    +	MUTEX_ASSERT_LOCKED(&vioq->viq_txmtx);
     	if (!ISSET(ifp->if_flags, IFF_RUNNING))
     		return 0;
     
    @@ -1418,13 +1651,34 @@ vio_txeof(struct virtqueue *vq)
     		m = vioq->viq_txmbufs[slot];
     		bus_dmamap_unload(vsc->sc_dmat, vioq->viq_txdmamaps[slot]);
     		vioq->viq_txmbufs[slot] = NULL;
    -		virtio_dequeue_commit(vq, slot);
    +		freed += virtio_dequeue_commit(vq, slot);
     		m_freem(m);
     	}
    +	KASSERT(vioq->viq_txfree_slots >= 0);
    +	vioq->viq_txfree_slots += freed;
    +	return r;
    +}
    +
    +
    +int
    +vio_txeof(struct virtqueue *vq)
    +{
    +	struct virtio_softc *vsc = vq->vq_owner;
    +	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    +	struct vio_queue *vioq = &sc->sc_q[vq->vq_index/2];
    +	int r;
    +
    +	mtx_enter(&vioq->viq_txmtx);
    +	r = vio_tx_dequeue(vq);
    +	mtx_leave(&vioq->viq_txmtx);
     
     	if (r) {
    -		ifq_clr_oactive(&ifp->if_snd);
    -		virtio_stop_vq_intr(vsc, vioq->viq_txvq);
    +		if (ifq_is_oactive(vioq->viq_ifq)) {
    +			mtx_enter(&vioq->viq_txmtx);
    +			virtio_stop_vq_intr(vsc, vq);
    +			mtx_leave(&vioq->viq_txmtx);
    +			ifq_restart(vioq->viq_ifq);
    +		}
     	}
     	if (vq->vq_used_idx == vq->vq_avail_idx)
     		timeout_del(&sc->sc_txtick);
    @@ -1469,6 +1723,8 @@ vio_tx_drain(struct vio_softc *sc)
     
     	for (q = 0; q < sc->sc_nqueues; q++) {
     		vioq = &sc->sc_q[q];
    +		ifq_barrier(vioq->viq_ifq);
    +		mtx_enter(&vioq->viq_txmtx);
     		for (i = 0; i < vioq->viq_txvq->vq_num; i++) {
     			if (vioq->viq_txmbufs[i] == NULL)
     				continue;
    @@ -1477,156 +1733,217 @@ vio_tx_drain(struct vio_softc *sc)
     			m_freem(vioq->viq_txmbufs[i]);
     			vioq->viq_txmbufs[i] = NULL;
     		}
    +		ifq_purge(vioq->viq_ifq);
    +		ifq_clr_oactive(vioq->viq_ifq);
    +		vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1;
    +		mtx_leave(&vioq->viq_txmtx);
     	}
     }
     
     /*
      * Control vq
      */
    -/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
    +
    +/*
    + * Lock the control queue and the sc_ctrl_* structs and prepare a request.
    + *
    + * If this function succeeds, the caller must also call either
    + * vio_ctrl_submit() or virtio_enqueue_abort(), in both cases followed by
    + * vio_ctrl_finish().
    + */
     int
    -vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
    +vio_ctrl_start(struct vio_softc *sc, uint8_t class, uint8_t cmd, int nslots,
    +    int *slotp)
     {
     	struct virtio_softc *vsc = sc->sc_virtio;
     	struct virtqueue *vq = sc->sc_ctl_vq;
    -	int r, slot;
    +	int r;
     
     	splassert(IPL_NET);
     
    -	if ((r = vio_wait_ctrl(sc)) != 0)
    -		return r;
    +	while (sc->sc_ctrl_inuse != FREE) {
    +		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
    +			return ENXIO;
    +		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP);
    +		if (r != 0)
    +			return r;
    +	}
    +	sc->sc_ctrl_inuse = INUSE;
     
    -	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
    +	sc->sc_ctrl_cmd->class = class;
     	sc->sc_ctrl_cmd->command = cmd;
    -	sc->sc_ctrl_rx->onoff = onoff;
     
    -	r = virtio_enqueue_prep(vq, &slot);
    +	r = virtio_enqueue_prep(vq, slotp);
     	if (r != 0)
     		panic("%s: %s virtio_enqueue_prep: control vq busy",
     		    sc->sc_dev.dv_xname, __func__);
    -	r = virtio_enqueue_reserve(vq, slot, 3);
    +	r = virtio_enqueue_reserve(vq, *slotp, nslots + 2);
     	if (r != 0)
     		panic("%s: %s virtio_enqueue_reserve: control vq busy",
     		    sc->sc_dev.dv_xname, __func__);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_cmd,
    +
    +	vio_dmamem_enqueue(vsc, sc, vq, *slotp, sc->sc_ctrl_cmd,
     	    sizeof(*sc->sc_ctrl_cmd), 1);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_rx,
    -	    sizeof(*sc->sc_ctrl_rx), 1);
    +
    +	return 0;
    +}
    +
    +/*
    + * Submit a control queue request and wait for the result.
    + *
    + * vio_ctrl_start() must have been called successfully.
    + * After vio_ctrl_submit(), the caller may inspect the
    + * data returned from the hypervisor. Afterwards, the caller
    + * must always call vio_ctrl_finish().
    + */
    +int
    +vio_ctrl_submit(struct vio_softc *sc, int slot)
    +{
    +	struct virtio_softc *vsc = sc->sc_virtio;
    +	struct virtqueue *vq = sc->sc_ctl_vq;
    +	int r;
    +
     	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_status,
     	    sizeof(*sc->sc_ctrl_status), 0);
    +
     	virtio_enqueue_commit(vsc, vq, slot, 1);
     
    -	if ((r = vio_wait_ctrl_done(sc)) != 0)
    -		goto out;
    +	while (sc->sc_ctrl_inuse != DONE) {
    +		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
    +			return ENXIO;
    +		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone",
    +		    VIRTIO_NET_CTRL_TIMEOUT);
    +		if (r != 0) {
    +			if (r == EWOULDBLOCK)
    +				printf("%s: ctrl queue timeout\n",
    +				    sc->sc_dev.dv_xname);
    +			vio_ctrl_wakeup(sc, RESET);
    +			return ENXIO;
    +		}
    +		if (cold)
    +			virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq);
    +	}
     
     	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
     	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
    -	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
     	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
     	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
     
    -	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
    -		r = 0;
    -	} else {
    -		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
    -		r = EIO;
    -	}
    +	if (sc->sc_ctrl_status->ack != VIRTIO_NET_OK)
    +		return EIO;
    +
    +	return 0;
    +}
    +
    +/*
    + * Unlock the control queue and the sc_ctrl_* structs.
    + *
    + * It is ok to call this function if the control queue is marked dead
    + * due to a fatal error.
    + */
    +void
    +vio_ctrl_finish(struct vio_softc *sc)
    +{
    +	if (sc->sc_ctrl_inuse == RESET)
    +		return;
     
    -	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, onoff, r);
    -out:
     	vio_ctrl_wakeup(sc, FREE);
    -	return r;
     }
     
    +/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
     int
    -vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
    +vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
     {
     	struct virtio_softc *vsc = sc->sc_virtio;
     	struct virtqueue *vq = sc->sc_ctl_vq;
     	int r, slot;
     
    -	splassert(IPL_NET);
    -
    -	if ((r = vio_wait_ctrl(sc)) != 0)
    +	r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_RX, cmd, 1, &slot);
    +	if (r != 0)
     		return r;
     
    -	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
    -	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
    -	sc->sc_ctrl_guest_offloads->offloads = features;
    -
    -	r = virtio_enqueue_prep(vq, &slot);
    -	if (r != 0)
    -		panic("%s: %s virtio_enqueue_prep: control vq busy",
    -		    sc->sc_dev.dv_xname, __func__);
    -	r = virtio_enqueue_reserve(vq, slot, 3);
    -	if (r != 0)
    -		panic("%s: %s virtio_enqueue_reserve: control vq busy",
    -		    sc->sc_dev.dv_xname, __func__);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_cmd,
    -	    sizeof(*sc->sc_ctrl_cmd), 1);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_guest_offloads,
    -	    sizeof(*sc->sc_ctrl_guest_offloads), 1);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_status,
    -	    sizeof(*sc->sc_ctrl_status), 0);
    -	virtio_enqueue_commit(vsc, vq, slot, 1);
    +	sc->sc_ctrl_rx->onoff = onoff;
     
    -	if ((r = vio_wait_ctrl_done(sc)) != 0)
    -		goto out;
    +	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_rx,
    +	    sizeof(*sc->sc_ctrl_rx), 1);
     
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
    -	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
    -	    sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE);
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
    -	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
    +	r = vio_ctrl_submit(sc, slot);
    +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_rx,
    +	    sizeof(*sc->sc_ctrl_rx), BUS_DMASYNC_POSTWRITE);
    +	if (r != 0)
    +		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname, cmd);
     
    -	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
    -		r = 0;
    -	} else {
    -		printf("%s: offload features 0x%llx failed\n",
    -		    sc->sc_dev.dv_xname, features);
    -		r = EIO;
    -	}
    +	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, onoff, r);
     
    -	DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r);
    - out:
    -	vio_ctrl_wakeup(sc, FREE);
    +	vio_ctrl_finish(sc);
     	return r;
     }
     
    +/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */
     int
    -vio_wait_ctrl(struct vio_softc *sc)
    +vio_ctrl_mq(struct vio_softc *sc)
     {
    -	int r = 0;
    +	struct virtio_softc *vsc = sc->sc_virtio;
    +	struct virtqueue *vq = sc->sc_ctl_vq;
    +	int r, slot;
     
    -	while (sc->sc_ctrl_inuse != FREE) {
    -		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
    -			return ENXIO;
    -		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP);
    -	}
    -	sc->sc_ctrl_inuse = INUSE;
     
    +	r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MQ,
    +	    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 1, &slot);
    +	if (r != 0)
    +		return r;
    +
    +	sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues;
    +
    +	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mq_pairs,
    +	    sizeof(*sc->sc_ctrl_mq_pairs), 1);
    +
    +	r = vio_ctrl_submit(sc, slot);
    +
    +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs,
    +	    sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE);
    +
    +	if (r != 0)
    +		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname,
    +		    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET);
    +
    +	DPRINTF("%s: cmd %d %d: %d\n", __func__,
    +	    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r);
    +
    +	vio_ctrl_finish(sc);
     	return r;
     }
     
     int
    -vio_wait_ctrl_done(struct vio_softc *sc)
    +vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
     {
    -	int r = 0;
    +	struct virtio_softc *vsc = sc->sc_virtio;
    +	struct virtqueue *vq = sc->sc_ctl_vq;
    +	int r, slot;
     
    -	while (sc->sc_ctrl_inuse != DONE) {
    -		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
    -			return ENXIO;
    -		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone",
    -		    VIRTIO_NET_CTRL_TIMEOUT);
    -		if (r == EWOULDBLOCK) {
    -			printf("%s: ctrl queue timeout\n",
    -			    sc->sc_dev.dv_xname);
    -			vio_ctrl_wakeup(sc, RESET);
    -			return ENXIO;
    -		}
    +	r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
    +	    VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, 1, &slot);
    +	if (r != 0)
    +		return r;
    +
    +	sc->sc_ctrl_guest_offloads->offloads = features;
    +
    +	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_guest_offloads,
    +	    sizeof(*sc->sc_ctrl_guest_offloads), 1);
    +
    +	r = vio_ctrl_submit(sc, slot);
    +
    +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
    +	    sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE);
    +
    +	if (r != 0) {
    +		printf("%s: offload features 0x%llx failed\n",
    +		    sc->sc_dev.dv_xname, features);
     	}
    +
    +	DPRINTF("%s: offload features 0x%llx: %d\n", __func__, features, r);
    +
    +	vio_ctrl_finish(sc);
     	return r;
     }
     
    @@ -1642,18 +1959,23 @@ vio_ctrleof(struct virtqueue *vq)
     {
     	struct virtio_softc *vsc = vq->vq_owner;
     	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    -	int r = 0, ret, slot;
    +	int r = 0, ret, slot, s;
     
    +	KERNEL_LOCK();
    +	s = splnet();
     again:
     	ret = virtio_dequeue(vsc, vq, &slot, NULL);
     	if (ret == ENOENT)
    -		return r;
    +		goto out;
     	virtio_dequeue_commit(vq, slot);
     	r++;
     	vio_ctrl_wakeup(sc, DONE);
     	if (virtio_start_vq_intr(vsc, vq))
     		goto again;
     
    +out:
    +	splx(s);
    +	KERNEL_UNLOCK();
     	return r;
     }
     
    @@ -1665,55 +1987,35 @@ vio_set_rx_filter(struct vio_softc *sc)
     	struct virtio_softc *vsc = sc->sc_virtio;
     	struct virtqueue *vq = sc->sc_ctl_vq;
     	int r, slot;
    +	size_t len_uc, len_mc;
     
    -	splassert(IPL_NET);
    -
    -	if ((r = vio_wait_ctrl(sc)) != 0)
    -		return r;
    -
    -	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
    -	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
     
    -	r = virtio_enqueue_prep(vq, &slot);
    +	r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MAC,
    +	    VIRTIO_NET_CTRL_MAC_TABLE_SET, 2, &slot);
     	if (r != 0)
    -		panic("%s: %s virtio_enqueue_prep: control vq busy",
    -		    sc->sc_dev.dv_xname, __func__);
    -	r = virtio_enqueue_reserve(vq, slot, 4);
    -	if (r != 0)
    -		panic("%s: %s virtio_enqueue_reserve: control vq busy",
    -		    sc->sc_dev.dv_xname, __func__);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_cmd,
    -	    sizeof(*sc->sc_ctrl_cmd), 1);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mac_tbl_uc,
    -	    sizeof(*sc->sc_ctrl_mac_tbl_uc) +
    -	    sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN, 1);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mac_tbl_mc,
    -	    sizeof(*sc->sc_ctrl_mac_tbl_mc) +
    -	    sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
    -	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_status,
    -	    sizeof(*sc->sc_ctrl_status), 0);
    -	virtio_enqueue_commit(vsc, vq, slot, 1);
    -
    -	if ((r = vio_wait_ctrl_done(sc)) != 0)
    -		goto out;
    -
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
    -	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_info,
    -	    VIO_CTRL_MAC_INFO_SIZE, BUS_DMASYNC_POSTWRITE);
    -	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
    -	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
    +		return r;
     
    -	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
    -		r = 0;
    -	} else {
    +	len_uc = sizeof(*sc->sc_ctrl_mac_tbl_uc) +
    +	    sc->sc_ctrl_mac_tbl_uc->nentries * ETHER_ADDR_LEN;
    +	len_mc = sizeof(*sc->sc_ctrl_mac_tbl_mc) +
    +	    sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN;
    +	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mac_tbl_uc, len_uc,
    +	    1);
    +	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mac_tbl_mc, len_mc,
    +	    1);
    +
    +	r = vio_ctrl_submit(sc, slot);
    +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_tbl_uc, len_uc,
    +	    BUS_DMASYNC_POSTWRITE);
    +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mac_tbl_mc, len_mc,
    +	    BUS_DMASYNC_POSTWRITE);
    +
    +	if (r != 0) {
     		/* The host's filter table is not large enough */
     		printf("%s: failed setting rx filter\n", sc->sc_dev.dv_xname);
    -		r = EIO;
     	}
     
    -out:
    -	vio_ctrl_wakeup(sc, FREE);
    +	vio_ctrl_finish(sc);
     	return r;
     }
     
    diff --git a/sys/dev/pv/virtio.c b/sys/dev/pv/virtio.c
    index 6d9fe06d645..227d31c24f4 100644
    --- a/sys/dev/pv/virtio.c
    +++ b/sys/dev/pv/virtio.c
    @@ -165,9 +165,9 @@ virtio_reinit_start(struct virtio_softc *sc)
     	for (i = 0; i < sc->sc_nvqs; i++) {
     		int n;
     		struct virtqueue *vq = &sc->sc_vqs[i];
    -		n = virtio_read_queue_size(sc, vq->vq_index);
    -		if (n == 0)	/* vq disappeared */
    +		if (vq->vq_num == 0)	/* not used */
     			continue;
    +		n = virtio_read_queue_size(sc, vq->vq_index);
     		if (n != vq->vq_num) {
     			panic("%s: virtqueue size changed, vq index %d",
     			    sc->sc_dev.dv_xname, vq->vq_index);
    @@ -255,8 +255,11 @@ virtio_check_vqs(struct virtio_softc *sc)
     	int i, r = 0;
     
     	/* going backwards is better for if_vio */
    -	for (i = sc->sc_nvqs - 1; i >= 0; i--)
    +	for (i = sc->sc_nvqs - 1; i >= 0; i--) {
    +		if (sc->sc_vqs[i].vq_num == 0)	/* not used */
    +			continue;
     		r |= virtio_check_vq(sc, &sc->sc_vqs[i]);
    +	}
     
     	return r;
     }
    @@ -450,6 +453,11 @@ virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
     	struct vq_entry *qe;
     	int i = 0;
     
    +	if (vq->vq_num == 0) {
    +		/* virtio_alloc_vq() was never called */
    +		return 0;
    +	}
    +
     	/* device must be already deactivated */
     	/* confirm the vq is empty */
     	SLIST_FOREACH(qe, &vq->vq_freelist, qe_list) {
    @@ -848,22 +856,25 @@ virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
      *
      *                 Don't call this if you use statically allocated slots
      *                 and virtio_enqueue_trim().
    + *
    + *                 returns the number of freed slots.
      */
     int
     virtio_dequeue_commit(struct virtqueue *vq, int slot)
     {
     	struct vq_entry *qe = &vq->vq_entries[slot];
     	struct vring_desc *vd = &vq->vq_desc[0];
    -	int s = slot;
    +	int s = slot, r = 1;
     
     	while (vd[s].flags & VRING_DESC_F_NEXT) {
     		s = vd[s].next;
     		vq_free_entry(vq, qe);
     		qe = &vq->vq_entries[s];
    +		r++;
     	}
     	vq_free_entry(vq, qe);
     
    -	return 0;
    +	return r;
     }
     
     /*
    diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
    index 63a4eb4b14c..d1820c74737 100644
    --- a/sys/dev/pv/virtiovar.h
    +++ b/sys/dev/pv/virtiovar.h
    @@ -103,7 +103,8 @@ struct vq_entry {
     
     struct virtqueue {
     	struct virtio_softc	*vq_owner;
    -	unsigned int		vq_num;  /* queue size (# of entries) */
    +	unsigned int		vq_num;  /* queue size (# of entries),
    +					  * 0 if unused/non-existant */
     	unsigned int		vq_mask; /* (1 << vq_num - 1) */
     	int			vq_index; /* queue number (0, 1, ...) */
     
    @@ -162,6 +163,8 @@ struct virtio_ops {
     	void		(*set_status)(struct virtio_softc *, int);
     	int		(*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
     	int		(*poll_intr)(void *);
    +	int		(*intr_establish)(struct virtio_softc *, struct virtio_attach_args *,
    +			    int, struct cpu_info *, int (*)(void *), void *);
     };
     
     #define VIRTIO_CHILD_ERROR	((void*)1)
    @@ -178,7 +181,7 @@ struct virtio_softc {
     	int			 sc_indirect;
     	int			 sc_version_1;
     
    -	int			 sc_nvqs;	/* set by child */
    +	int			 sc_nvqs;	/* size of sc_vqs, set by child */
     	struct virtqueue	*sc_vqs;	/* set by child */
     
     	struct device		*sc_child;	/* set by child,
    @@ -204,6 +207,14 @@ struct virtio_softc {
     #define	virtio_get_status(sc)			(sc)->sc_ops->get_status(sc)
     #define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
     
    +/*
    + * virtio_intr_establish() only works if va_nintr > 1. If it is called by a
    + * child driver, the transport driver will skip automatic intr allocation and
    + * the child driver must allocate all required interrupts itself. Vector 0 is
    + * always used for the config change interrupt.
    + */
    +#define	virtio_intr_establish(sc, va, v, ci, fn, a)	(sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a)
    +
     /* only for transport drivers */
     #define	virtio_device_reset(sc)			virtio_set_status((sc), 0)
     
    
    
    
  • Stefan Fritsch:

    vio(4) multi-queue V6