Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: vio: Enable multiqueue
To:
Stefan Fritsch <sf@openbsd.org>
Cc:
tech@openbsd.org
Date:
Mon, 13 Jan 2025 22:35:27 +0100

Download raw body.

Thread
  • Chris Cappuccio:

    vio: Enable multiqueue

  • Alexander Bluhm:

    vio: Enable multiqueue

  • On Tue, Jan 07, 2025 at 09:26:55AM +0100, Stefan Fritsch wrote:
    > Hi,
    > 
    > this diff finally enables multiqueue for vio(4). It goes on top of the 
    > "virtio: Support unused virtqueues" diff from my previous mail.
    > 
    > The distribution of of packets to the enabled queues is not optimal. To 
    > improve this, one would need the optional RSS (receive-side scaling) 
    > feature which is difficult to configure with libvirt/qemu and therefore 
    > usually not available on hypervisors. Things may improve with future 
    > libvirt versions. RSS support is not included in this diff. But even 
    > without RSS, we have seen some nice performance gains.
    >     
    > We use a single interrupt vector for every rx/tx queue pair. With config
    > and control queue vectors, we need N+2 vectors for N queues.  If
    > multi-queue is not available, the old scheme is used with either one
    > vector per virtqueue or one vector for all queues.
    >     
    > * virtio: Add API to establish interrupts on specific cpus in child
    >   drivers. Also make virtio_pci_setup_msix return proper errno.
    >     
    > * virtio_pci: Increase max number of MSIX vectors
    >     
    > * vio: Configure multiple queues and allocate proper interrupts.
    > 
    > 
    > I am not entirely happy with the API for establishing interrupts. But 
    > there are several variants how interrupts need to be handled (virtio pci 
    > with or without MSIX, virtio mmio) and I want to keep those details from 
    > the child drivers as far as possible. The way I have implemented it, only 
    > the child drivers that need to allocate per-cpu interrupts need to deal 
    > with the new API. If anyone has a better idea, I would be interested to 
    > hear it.
    > 
    > comments? ok?
    
    I have tested it on KVM, on vmd, and with SEV bounce buffers.
    
    With parallel send and receive of UDP packets it seems to be faster.
    But there is a lot of variation in the tests.  TCP has the bottle
    neck somewhere else, there is no significant difference.
    
    OK bluhm@
    
    > diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
    > index da7f2c3bea3..3ca631dcca0 100644
    > --- a/sys/dev/fdt/virtio_mmio.c
    > +++ b/sys/dev/fdt/virtio_mmio.c
    > @@ -105,6 +105,8 @@ int		virtio_mmio_negotiate_features(struct virtio_softc *,
    >      const struct virtio_feature_name *);
    >  int		virtio_mmio_intr(void *);
    >  void		virtio_mmio_intr_barrier(struct virtio_softc *);
    > +int		virtio_mmio_intr_establish(struct virtio_softc *, struct virtio_attach_args *,
    > +    int, struct cpu_info *, int (*)(void *), void *);
    >  
    >  struct virtio_mmio_softc {
    >  	struct virtio_softc	sc_sc;
    > @@ -160,6 +162,7 @@ const struct virtio_ops virtio_mmio_ops = {
    >  	virtio_mmio_attach_finish,
    >  	virtio_mmio_intr,
    >  	virtio_mmio_intr_barrier,
    > +	virtio_mmio_intr_establish,
    >  };
    >  
    >  uint16_t
    > @@ -546,3 +549,11 @@ virtio_mmio_intr_barrier(struct virtio_softc *vsc)
    >  	if (sc->sc_ih)
    >  		intr_barrier(sc->sc_ih);
    >  }
    > +
    > +int
    > +virtio_mmio_intr_establish(struct virtio_softc *vsc,
    > +    struct virtio_attach_args *va, int vec, struct cpu_info *ci,
    > +    int (*func)(void *), void *arg)
    > +{
    > +	return ENXIO;
    > +}
    > diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
    > index 8463f6223de..5f91d0ebe77 100644
    > --- a/sys/dev/pci/virtio_pci.c
    > +++ b/sys/dev/pci/virtio_pci.c
    > @@ -50,7 +50,7 @@
    >   * XXX: PCI-endian while the device specific registers are native endian.
    >   */
    >  
    > -#define MAX_MSIX_VECS	8
    > +#define MAX_MSIX_VECS	16
    >  
    >  struct virtio_pci_softc;
    >  struct virtio_pci_attach_args;
    > @@ -62,7 +62,7 @@ int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *p
    >  int		virtio_pci_detach(struct device *, int);
    >  
    >  void		virtio_pci_kick(struct virtio_softc *, uint16_t);
    > -int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
    > +int		virtio_pci_adjust_config_region(struct virtio_pci_softc *, int offset);
    >  uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
    >  uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
    >  uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
    > @@ -81,9 +81,10 @@ int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_fe
    >  int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
    >  void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
    >  void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
    > -int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, int (*)(void *), void *);
    > +int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
    >  int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct virtio_pci_attach_args *, int);
    >  void		virtio_pci_intr_barrier(struct virtio_softc *);
    > +int		virtio_pci_intr_establish(struct virtio_softc *, struct virtio_attach_args *, int, struct cpu_info *, int (*)(void *), void *);
    >  void		virtio_pci_free_irqs(struct virtio_pci_softc *);
    >  int		virtio_pci_poll_intr(void *);
    >  int		virtio_pci_legacy_intr(void *);
    > @@ -100,6 +101,7 @@ enum irq_type {
    >  	IRQ_NO_MSIX,
    >  	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
    >  	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
    > +	IRQ_MSIX_CHILD,  /* assigned by child driver */
    >  };
    >  
    >  struct virtio_pci_intr {
    > @@ -179,6 +181,7 @@ const struct virtio_ops virtio_pci_ops = {
    >  	virtio_pci_attach_finish,
    >  	virtio_pci_poll_intr,
    >  	virtio_pci_intr_barrier,
    > +	virtio_pci_intr_establish,
    >  };
    >  
    >  static inline uint64_t
    > @@ -648,10 +651,12 @@ virtio_pci_attach(struct device *parent, struct device *self, void *aux)
    >  		goto free;
    >  	}
    >  
    > -	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
    >  	sc->sc_irq_type = IRQ_NO_MSIX;
    > -	if (virtio_pci_adjust_config_region(sc) != 0)
    > -		goto err;
    > +	if (virtio_pci_adjust_config_region(sc,
    > +	    VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI) != 0)
    > +	{
    > +		goto free;
    > +	}
    >  
    >  	virtio_device_reset(vsc);
    >  	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
    > @@ -692,7 +697,9 @@ virtio_pci_attach_finish(struct virtio_softc *vsc,
    >  	pci_chipset_tag_t pc = vpa->vpa_pa->pa_pc;
    >  	char const *intrstr;
    >  
    > -	if (virtio_pci_setup_msix(sc, vpa, 0) == 0) {
    > +	if (sc->sc_irq_type == IRQ_MSIX_CHILD) {
    > +		intrstr = "msix";
    > +	} else if (virtio_pci_setup_msix(sc, vpa, 0) == 0) {
    >  		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
    >  		intrstr = "msix per-VQ";
    >  	} else if (virtio_pci_setup_msix(sc, vpa, 1) == 0) {
    > @@ -754,11 +761,14 @@ virtio_pci_detach(struct device *self, int flags)
    >  }
    >  
    >  int
    > -virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
    > +virtio_pci_adjust_config_region(struct virtio_pci_softc *sc, int offset)
    >  {
    >  	if (sc->sc_sc.sc_version_1)
    >  		return 0;
    > -	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
    > +	if (sc->sc_devcfg_offset == offset)
    > +		return 0;
    > +	sc->sc_devcfg_offset = offset;
    > +	sc->sc_devcfg_iosize = sc->sc_iosize - offset;
    >  	sc->sc_devcfg_iot = sc->sc_iot;
    >  	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
    >  	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
    > @@ -958,30 +968,33 @@ virtio_pci_write_device_config_8(struct virtio_softc *vsc,
    >  
    >  int
    >  virtio_pci_msix_establish(struct virtio_pci_softc *sc,
    > -    struct virtio_pci_attach_args *vpa, int idx,
    > +    struct virtio_pci_attach_args *vpa, int idx, struct cpu_info *ci,
    >      int (*handler)(void *), void *ih_arg)
    >  {
    >  	struct virtio_softc *vsc = &sc->sc_sc;
    >  	pci_intr_handle_t ih;
    > +	int r;
    >  
    >  	KASSERT(idx < sc->sc_nintr);
    >  
    > -	if (pci_intr_map_msix(vpa->vpa_pa, idx, &ih) != 0) {
    > +	r = pci_intr_map_msix(vpa->vpa_pa, idx, &ih);
    > +	if (r != 0) {
    >  #if VIRTIO_DEBUG
    >  		printf("%s[%d]: pci_intr_map_msix failed\n",
    >  		    vsc->sc_dev.dv_xname, idx);
    >  #endif
    > -		return 1;
    > +		return r;
    >  	}
    >  	snprintf(sc->sc_intr[idx].name, sizeof(sc->sc_intr[idx].name), "%s:%d",
    >  	    vsc->sc_child->dv_xname, idx);
    > -	sc->sc_intr[idx].ih = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
    > -	    handler, ih_arg, sc->sc_intr[idx].name);
    > +	sc->sc_intr[idx].ih = pci_intr_establish_cpu(sc->sc_pc, ih, vsc->sc_ipl,
    > +	    ci, handler, ih_arg, sc->sc_intr[idx].name);
    >  	if (sc->sc_intr[idx].ih == NULL) {
    >  		printf("%s[%d]: couldn't establish msix interrupt\n",
    > -		    vsc->sc_dev.dv_xname, idx);
    > -		return 1;
    > +		    vsc->sc_child->dv_xname, idx);
    > +		return ENOMEM;
    >  	}
    > +	virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_MSI);
    >  	return 0;
    >  }
    >  
    > @@ -1031,8 +1044,8 @@ virtio_pci_free_irqs(struct virtio_pci_softc *sc)
    >  		}
    >  	}
    >  
    > -	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
    > -	virtio_pci_adjust_config_region(sc);
    > +	/* XXX msix_delroute does not unset PCI_MSIX_MC_MSIXE -> leave alone? */
    > +	virtio_pci_adjust_config_region(sc, VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI);
    >  }
    >  
    >  int
    > @@ -1040,34 +1053,33 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
    >      struct virtio_pci_attach_args *vpa, int shared)
    >  {
    >  	struct virtio_softc *vsc = &sc->sc_sc;
    > -	int i;
    > +	int i, r = 0;
    >  
    >  	/* Shared needs config + queue */
    >  	if (shared && vpa->vpa_va.va_nintr < 1 + 1)
    > -		return 1;
    > +		return ERANGE;
    >  	/* Per VQ needs config + N * queue */
    >  	if (!shared && vpa->vpa_va.va_nintr < 1 + vsc->sc_nvqs)
    > -		return 1;
    > +		return ERANGE;
    >  
    > -	if (virtio_pci_msix_establish(sc, vpa, 0, virtio_pci_config_intr, vsc))
    > -		return 1;
    > -	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
    > -	virtio_pci_adjust_config_region(sc);
    > +	r = virtio_pci_msix_establish(sc, vpa, 0, NULL, virtio_pci_config_intr, vsc);
    > +	if (r != 0)
    > +		return r;
    >  
    >  	if (shared) {
    > -		if (virtio_pci_msix_establish(sc, vpa, 1,
    > -		    virtio_pci_shared_queue_intr, vsc)) {
    > +		r = virtio_pci_msix_establish(sc, vpa, 1, NULL,
    > +		    virtio_pci_shared_queue_intr, vsc);
    > +		if (r != 0)
    >  			goto fail;
    > -		}
    >  
    >  		for (i = 0; i < vsc->sc_nvqs; i++)
    >  			vsc->sc_vqs[i].vq_intr_vec = 1;
    >  	} else {
    >  		for (i = 0; i < vsc->sc_nvqs; i++) {
    > -			if (virtio_pci_msix_establish(sc, vpa, i + 1,
    > -			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
    > +			r = virtio_pci_msix_establish(sc, vpa, i + 1, NULL,
    > +			    virtio_pci_queue_intr, &vsc->sc_vqs[i]);
    > +			if (r != 0)
    >  				goto fail;
    > -			}
    >  			vsc->sc_vqs[i].vq_intr_vec = i + 1;
    >  		}
    >  	}
    > @@ -1075,7 +1087,28 @@ virtio_pci_setup_msix(struct virtio_pci_softc *sc,
    >  	return 0;
    >  fail:
    >  	virtio_pci_free_irqs(sc);
    > -	return 1;
    > +	return r;
    > +}
    > +
    > +int
    > +virtio_pci_intr_establish(struct virtio_softc *vsc,
    > +    struct virtio_attach_args *va, int vec, struct cpu_info *ci,
    > +    int (*func)(void *), void *arg)
    > +{
    > +	struct virtio_pci_attach_args *vpa;
    > +	struct virtio_pci_softc *sc;
    > +
    > +	if (vsc->sc_ops != &virtio_pci_ops)
    > +		return ENXIO;
    > +
    > +	vpa = (struct virtio_pci_attach_args *)va;
    > +	sc = (struct virtio_pci_softc *)vsc;
    > +
    > +	if (vec >= sc->sc_nintr || sc->sc_nintr <= 1)
    > +		return ERANGE;
    > +
    > +	sc->sc_irq_type = IRQ_MSIX_CHILD;
    > +	return virtio_pci_msix_establish(sc, vpa, vec, ci, func, arg);
    >  }
    >  
    >  void
    > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
    > index a728940e314..20d1bfc1ca8 100644
    > --- a/sys/dev/pv/if_vio.c
    > +++ b/sys/dev/pv/if_vio.c
    > @@ -32,8 +32,10 @@
    >  #include <sys/param.h>
    >  #include <sys/systm.h>
    >  #include <sys/device.h>
    > +#include <sys/intrmap.h>
    >  #include <sys/mbuf.h>
    >  #include <sys/mutex.h>
    > +#include <sys/percpu.h>	/* for CACHELINESIZE */
    >  #include <sys/sockio.h>
    >  #include <sys/timeout.h>
    >  
    > @@ -64,8 +66,15 @@
    >   * if_vioreg.h:
    >   */
    >  /* Configuration registers */
    > -#define VIRTIO_NET_CONFIG_MAC		0 /* 8bit x 6byte */
    > -#define VIRTIO_NET_CONFIG_STATUS	6 /* 16bit */
    > +#define VIRTIO_NET_CONFIG_MAC		 0 /*  8 bit x 6 byte */
    > +#define VIRTIO_NET_CONFIG_STATUS	 6 /* 16 bit */
    > +#define VIRTIO_NET_CONFIG_MAX_QUEUES	 8 /* 16 bit */
    > +#define VIRTIO_NET_CONFIG_MTU		10 /* 16 bit */
    > +#define VIRTIO_NET_CONFIG_SPEED		12 /* 32 bit */
    > +#define VIRTIO_NET_CONFIG_DUPLEX	16 /*  8 bit */
    > +#define VIRTIO_NET_CONFIG_RSS_SIZE	17 /*  8 bit */
    > +#define VIRTIO_NET_CONFIG_RSS_LEN	18 /* 16 bit */
    > +#define VIRTIO_NET_CONFIG_HASH_TYPES	20 /* 16 bit */
    >  
    >  /* Feature bits */
    >  #define VIRTIO_NET_F_CSUM			(1ULL<<0)
    > @@ -183,6 +192,11 @@ struct virtio_net_ctrl_cmd {
    >  # define VIRTIO_NET_CTRL_VLAN_ADD	0
    >  # define VIRTIO_NET_CTRL_VLAN_DEL	1
    >  
    > +#define VIRTIO_NET_CTRL_MQ		4
    > +# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET	0
    > +# define VIRTIO_NET_CTRL_MQ_RSS_CONFIG		1
    > +# define VIRTIO_NET_CTRL_MQ_HASH_CONFIG		2
    > +
    >  #define VIRTIO_NET_CTRL_GUEST_OFFLOADS	5
    >  # define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET	0
    >  
    > @@ -196,6 +210,12 @@ struct virtio_net_ctrl_rx {
    >  	uint8_t	onoff;
    >  } __packed;
    >  
    > +struct virtio_net_ctrl_mq_pairs_set {
    > +	uint16_t virtqueue_pairs;
    > +};
    > +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN	1
    > +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX	0x8000
    > +
    >  struct virtio_net_ctrl_guest_offloads {
    >  	uint64_t offloads;
    >  } __packed;
    > @@ -231,7 +251,7 @@ struct vio_queue {
    >  	struct virtqueue	 *viq_txvq;
    >  	struct mutex		  viq_txmtx, viq_rxmtx;
    >  	int			  viq_txfree_slots;
    > -};
    > +} __aligned(CACHELINESIZE);
    >  
    >  struct vio_softc {
    >  	struct device		sc_dev;
    > @@ -251,14 +271,16 @@ struct vio_softc {
    >  	caddr_t			sc_dma_kva;
    >  
    >  	int			sc_hdr_size;
    > -	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
    > -	struct virtio_net_ctrl_status *sc_ctrl_status;
    > -	struct virtio_net_ctrl_rx *sc_ctrl_rx;
    > -	struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
    > -	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
    > +	struct virtio_net_ctrl_cmd		*sc_ctrl_cmd;
    > +	struct virtio_net_ctrl_status		*sc_ctrl_status;
    > +	struct virtio_net_ctrl_rx		*sc_ctrl_rx;
    > +	struct virtio_net_ctrl_mq_pairs_set	*sc_ctrl_mq_pairs;
    > +	struct virtio_net_ctrl_guest_offloads	*sc_ctrl_guest_offloads;
    > +	struct virtio_net_ctrl_mac_tbl		*sc_ctrl_mac_tbl_uc;
    >  #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
    > -	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
    > +	struct virtio_net_ctrl_mac_tbl		*sc_ctrl_mac_tbl_mc;
    >  
    > +	struct intrmap		*sc_intrmap;
    >  	struct vio_queue	*sc_q;
    >  	uint16_t		sc_nqueues;
    >  	int			sc_tx_slots_per_req;
    > @@ -317,10 +339,15 @@ void	vio_tx_drain(struct vio_softc *);
    >  int	vio_encap(struct vio_queue *, int, struct mbuf *);
    >  void	vio_txtick(void *);
    >  
    > +int	vio_queue_intr(void *);
    > +int	vio_config_intr(void *);
    > +int	vio_ctrl_intr(void *);
    > +
    >  /* other control */
    >  void	vio_link_state(struct ifnet *);
    >  int	vio_config_change(struct virtio_softc *);
    >  int	vio_ctrl_rx(struct vio_softc *, int, int);
    > +int	vio_ctrl_mq(struct vio_softc *);
    >  int	vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
    >  int	vio_set_rx_filter(struct vio_softc *);
    >  void	vio_iff(struct vio_softc *);
    > @@ -408,6 +435,8 @@ vio_free_dmamem(struct vio_softc *sc)
    >   *   sc_ctrl_status:	 return value for a command via ctrl vq (READ)
    >   *   sc_ctrl_rx:	 parameter for a VIRTIO_NET_CTRL_RX class command
    >   *			 (WRITE)
    > + *   sc_ctrl_mq_pairs_set: set number of rx/tx queue pais (WRITE)
    > + *   sc_ctrl_guest_offloads: configure offload features (WRITE)
    >   *   sc_ctrl_mac_tbl_uc: unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
    >   *			 class command (WRITE)
    >   *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
    > @@ -449,6 +478,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
    >  		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
    >  		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
    >  		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
    > +		allocsize += sizeof(struct virtio_net_ctrl_mq_pairs_set) * 1;
    >  		allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
    >  		allocsize += VIO_CTRL_MAC_INFO_SIZE;
    >  	}
    > @@ -474,6 +504,8 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
    >  		offset += sizeof(*sc->sc_ctrl_status);
    >  		sc->sc_ctrl_rx = (void *)(kva + offset);
    >  		offset += sizeof(*sc->sc_ctrl_rx);
    > +		sc->sc_ctrl_mq_pairs = (void *)(kva + offset);
    > +		offset += sizeof(*sc->sc_ctrl_mq_pairs);
    >  		sc->sc_ctrl_guest_offloads = (void *)(kva + offset);
    >  		offset += sizeof(*sc->sc_ctrl_guest_offloads);
    >  		sc->sc_ctrl_mac_tbl_uc = (void *)(kva + offset);
    > @@ -598,7 +630,7 @@ vio_attach(struct device *parent, struct device *self, void *aux)
    >  	struct vio_softc *sc = (struct vio_softc *)self;
    >  	struct virtio_softc *vsc = (struct virtio_softc *)parent;
    >  	struct virtio_attach_args *va = aux;
    > -	int i, tx_max_segments;
    > +	int i, r, tx_max_segments;
    >  	struct ifnet *ifp = &sc->sc_ac.ac_if;
    >  
    >  	if (vsc->sc_child != NULL) {
    > @@ -616,6 +648,9 @@ vio_attach(struct device *parent, struct device *self, void *aux)
    >  	    VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
    >  	    VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM;
    >  
    > +	if (va->va_nintr > 3 && ncpus > 1)
    > +		vsc->sc_driver_features |= VIRTIO_NET_F_MQ;
    > +
    >  	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
    >  	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
    >  
    > @@ -626,10 +661,23 @@ vio_attach(struct device *parent, struct device *self, void *aux)
    >  	if (virtio_negotiate_features(vsc, virtio_net_feature_names) != 0)
    >  		goto err;
    >  
    > -	sc->sc_nqueues = 1;
    > -	vsc->sc_nvqs = 2 * sc->sc_nqueues;
    > -	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
    > -		vsc->sc_nvqs++;
    > +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
    > +		i = virtio_read_device_config_2(vsc,
    > +		    VIRTIO_NET_CONFIG_MAX_QUEUES);
    > +		vsc->sc_nvqs = 2 * i + 1;
    > +		i = MIN(i, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);
    > +		sc->sc_intrmap = intrmap_create(&sc->sc_dev, i,
    > +		    va->va_nintr - 2, 0);
    > +		sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
    > +		printf(": %u queue%s", sc->sc_nqueues,
    > +		    sc->sc_nqueues > 1 ? "s"  : "");
    > +	} else {
    > +		sc->sc_nqueues = 1;
    > +		printf(": 1 queue");
    > +		vsc->sc_nvqs = 2;
    > +		if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
    > +			vsc->sc_nvqs++;
    > +	}
    >  
    >  	vsc->sc_vqs = mallocarray(vsc->sc_nvqs, sizeof(*vsc->sc_vqs), M_DEVBUF,
    >  	    M_WAITOK|M_ZERO);
    > @@ -729,18 +777,66 @@ vio_attach(struct device *parent, struct device *self, void *aux)
    >  		else
    >  			virtio_stop_vq_intr(vsc, vioq->viq_txvq);
    >  		vioq->viq_txfree_slots = vioq->viq_txvq->vq_num - 1;
    > +		KASSERT(vioq->viq_txfree_slots > sc->sc_tx_slots_per_req);
    > +		if (vioq->viq_txvq->vq_num != sc->sc_q[0].viq_txvq->vq_num) {
    > +			printf("inequal tx queue size %d: %d != %d\n", i,
    > +			    vioq->viq_txvq->vq_num,
    > +			    sc->sc_q[0].viq_txvq->vq_num);
    > +			goto err;
    > +		}
    > +		DPRINTF("%d: q %p rx %p tx %p\n", i, vioq, vioq->viq_rxvq,
    > +		    vioq->viq_txvq);
    > +
    > +		if (sc->sc_intrmap != NULL) {
    > +			vioq->viq_rxvq->vq_intr_vec = i + 2;
    > +			vioq->viq_txvq->vq_intr_vec = i + 2;
    > +		}
    >  	}
    >  
    >  	/* control queue */
    >  	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ)) {
    > -		sc->sc_ctl_vq = &vsc->sc_vqs[2];
    > -		if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, 2, 1,
    > -		    "control") != 0)
    > +		i = 2;
    > +		if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
    > +			i = 2 * virtio_read_device_config_2(vsc,
    > +			    VIRTIO_NET_CONFIG_MAX_QUEUES);
    > +		}
    > +		sc->sc_ctl_vq =  &vsc->sc_vqs[i];
    > +		if (virtio_alloc_vq(vsc, sc->sc_ctl_vq, i, 1, "control") != 0)
    >  			goto err;
    >  		sc->sc_ctl_vq->vq_done = vio_ctrleof;
    > +		if (sc->sc_intrmap != NULL)
    > +			sc->sc_ctl_vq->vq_intr_vec = 1;
    >  		virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
    >  	}
    >  
    > +	if (sc->sc_intrmap) {
    > +		r = virtio_intr_establish(vsc, va, 0, NULL, vio_config_intr,
    > +		    vsc);
    > +		if (r != 0) {
    > +			printf("%s: cannot alloc config intr: %d\n",
    > +			    sc->sc_dev.dv_xname, r);
    > +			goto err;
    > +		}
    > +		r = virtio_intr_establish(vsc, va, 1, NULL, vio_ctrl_intr,
    > +		    sc->sc_ctl_vq);
    > +		if (r != 0) {
    > +			printf("%s: cannot alloc ctrl intr: %d\n",
    > +			    sc->sc_dev.dv_xname, r);
    > +			goto err;
    > +		}
    > +		for (i = 0; i < sc->sc_nqueues; i++) {
    > +			struct cpu_info *ci = NULL;
    > +			ci = intrmap_cpu(sc->sc_intrmap, i);
    > +			r = virtio_intr_establish(vsc, va, i + 2, ci,
    > +			    vio_queue_intr, &sc->sc_q[i]);
    > +			if (r != 0) {
    > +				printf("%s: cannot alloc q%d intr: %d\n",
    > +				    sc->sc_dev.dv_xname, i, r);
    > +				goto err;
    > +			}
    > +		}
    > +	}
    > +
    >  	if (vio_alloc_mem(sc, tx_max_segments) < 0)
    >  		goto err;
    >  
    > @@ -760,6 +856,11 @@ vio_attach(struct device *parent, struct device *self, void *aux)
    >  	if (virtio_attach_finish(vsc, va) != 0)
    >  		goto err;
    >  
    > +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
    > +		/* ctrl queue works only after DRIVER_OK */
    > +		vio_ctrl_mq(sc);
    > +	}
    > +
    >  	if_attach(ifp);
    >  	ether_ifattach(ifp);
    >  	vio_link_state(ifp);
    > @@ -805,6 +906,33 @@ vio_link_state(struct ifnet *ifp)
    >  	}
    >  }
    >  
    > +/* interrupt handlers for multi-queue */
    > +int
    > +vio_queue_intr(void *arg)
    > +{
    > +	struct vio_queue *vioq = arg;
    > +	struct virtio_softc *vsc = vioq->viq_sc->sc_virtio;
    > +	int r;
    > +	r = virtio_check_vq(vsc, vioq->viq_txvq);
    > +	r |= virtio_check_vq(vsc, vioq->viq_rxvq);
    > +	return r;
    > +}
    > +
    > +int
    > +vio_config_intr(void *arg)
    > +{
    > +	struct virtio_softc *vsc = arg;
    > +	return vio_config_change(vsc);
    > +}
    > +
    > +int
    > +vio_ctrl_intr(void *arg)
    > +{
    > +	struct virtqueue *vq = arg;
    > +	return virtio_check_vq(vq->vq_owner, vq);
    > +}
    > +
    > +
    >  int
    >  vio_config_change(struct virtio_softc *vsc)
    >  {
    > @@ -913,6 +1041,8 @@ vio_stop(struct ifnet *ifp, int disable)
    >  	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
    >  		virtio_start_vq_intr(vsc, sc->sc_ctl_vq);
    >  	virtio_reinit_end(vsc);
    > +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ))
    > +		vio_ctrl_mq(sc);
    >  	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_VQ))
    >  		vio_ctrl_wakeup(sc, FREE);
    >  }
    > @@ -1137,6 +1267,33 @@ vio_dump(struct vio_softc *sc)
    >  }
    >  #endif
    >  
    > +static int
    > +vio_rxr_info(struct vio_softc *sc, struct if_rxrinfo *ifri)
    > +{
    > +	struct if_rxring_info *ifrs, *ifr;
    > +	int error;
    > +	unsigned int i;
    > +
    > +	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
    > +	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
    > +	if (ifrs == NULL)
    > +		return (ENOMEM);
    > +
    > +	for (i = 0; i < sc->sc_nqueues; i++) {
    > +		ifr = &ifrs[i];
    > +
    > +		ifr->ifr_size = sc->sc_rx_mbuf_size;
    > +		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
    > +		ifr->ifr_info = sc->sc_q[i].viq_rxring;
    > +	}
    > +
    > +	error = if_rxr_info_ioctl(ifri, i, ifrs);
    > +
    > +	free(ifrs, M_TEMP, i * sizeof(*ifrs));
    > +
    > +	return (error);
    > +}
    > +
    >  int
    >  vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
    >  {
    > @@ -1171,8 +1328,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
    >  		r = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
    >  		break;
    >  	case SIOCGIFRXR:
    > -		r = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
    > -		    NULL, sc->sc_rx_mbuf_size, &sc->sc_q[0].viq_rxring);
    > +		r = vio_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
    >  		break;
    >  	default:
    >  		r = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
    > @@ -1666,6 +1822,8 @@ vio_ctrl_submit(struct vio_softc *sc, int slot)
    >  			vio_ctrl_wakeup(sc, RESET);
    >  			return ENXIO;
    >  		}
    > +		if (cold)
    > +			virtio_check_vq(sc->sc_virtio, sc->sc_ctl_vq);
    >  	}
    >  
    >  	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
    > @@ -1723,6 +1881,41 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
    >  	return r;
    >  }
    >  
    > +/* issue a VIRTIO_NET_CTRL_MQ class command and wait for completion */
    > +int
    > +vio_ctrl_mq(struct vio_softc *sc)
    > +{
    > +	struct virtio_softc *vsc = sc->sc_virtio;
    > +	struct virtqueue *vq = sc->sc_ctl_vq;
    > +	int r, slot;
    > +
    > +
    > +	r = vio_ctrl_start(sc, VIRTIO_NET_CTRL_MQ,
    > +	    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, 1, &slot);
    > +	if (r != 0)
    > +		return r;
    > +
    > +	sc->sc_ctrl_mq_pairs->virtqueue_pairs = sc->sc_nqueues;
    > +
    > +	vio_dmamem_enqueue(vsc, sc, vq, slot, sc->sc_ctrl_mq_pairs,
    > +	    sizeof(*sc->sc_ctrl_mq_pairs), 1);
    > +
    > +	r = vio_ctrl_submit(sc, slot);
    > +
    > +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_mq_pairs,
    > +	    sizeof(*sc->sc_ctrl_mq_pairs), BUS_DMASYNC_POSTWRITE);
    > +
    > +	if (r != 0)
    > +		printf("%s: ctrl cmd %d failed\n", sc->sc_dev.dv_xname,
    > +		    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET);
    > +
    > +	DPRINTF("%s: cmd %d %d: %d\n", __func__,
    > +	    VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, sc->sc_nqueues, r);
    > +
    > +	vio_ctrl_finish(sc);
    > +	return r;
    > +}
    > +
    >  int
    >  vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
    >  {
    > diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
    > index 207e43ce9b8..a0727e76ee8 100644
    > --- a/sys/dev/pv/virtiovar.h
    > +++ b/sys/dev/pv/virtiovar.h
    > @@ -165,6 +165,8 @@ struct virtio_ops {
    >  	int		(*attach_finish)(struct virtio_softc *, struct virtio_attach_args *);
    >  	int		(*poll_intr)(void *);
    >  	void		(*intr_barrier)(struct virtio_softc *);
    > +	int		(*intr_establish)(struct virtio_softc *, struct virtio_attach_args *,
    > +			    int, struct cpu_info *, int (*)(void *), void *);
    >  };
    >  
    >  #define VIRTIO_CHILD_ERROR	((void*)1)
    > @@ -208,6 +210,14 @@ struct virtio_softc {
    >  #define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
    >  #define	virtio_intr_barrier(sc)			(sc)->sc_ops->intr_barrier(sc)
    >  
    > +/*
    > + * virtio_intr_establish() only works if va_nintr > 1. If it is called by a
    > + * child driver, the transport driver will skip automatic intr allocation and
    > + * the child driver must allocate all required interrupts itself. Vector 0 is
    > + * always used for the config change interrupt.
    > + */
    > +#define	virtio_intr_establish(sc, va, v, ci, fn, a)	(sc)->sc_ops->intr_establish(sc, va, v, ci, fn, a)
    > +
    >  /* only for transport drivers */
    >  #define	virtio_device_reset(sc)			virtio_set_status((sc), 0)
    >  
    
    
    
  • Chris Cappuccio:

    vio: Enable multiqueue

  • Alexander Bluhm:

    vio: Enable multiqueue