Index | Thread | Search

From:
Yuichiro NAITO <naito.yuichiro@gmail.com>
Subject:
Re: iavf(4): multi-queue support
To:
j.klemkow@wemelug.de
Cc:
tech@openbsd.org
Date:
Fri, 22 Nov 2024 16:57:32 +0900

Download raw body.

Thread
From: Jan Klemkow <j.klemkow@wemelug.de>
Subject: Re: iavf(4): multi-queue support
Date: Thu, 21 Nov 2024 10:44:40 +0100

> On Thu, Nov 21, 2024 at 10:31:02AM GMT, Yuichiro NAITO wrote:
>> From: Yuichiro NAITO <naito.yuichiro@gmail.com>
>> Subject: Re: iavf(4): multi-queue support
>> Date: Wed, 04 Sep 2024 17:22:21 +0900 (JST)
>> 
>> > Hi. Suppose you are interested in iavf(4) multi-queue. Try the following
>> > complete patch which enables multi-queue, checksum offloads, and TSO.
>> > I confirmed it works on my ESXi 8.0 and Linux qemu/kvm. Iperf3 results in
>> > 9.41 Gbps transmit speed and 6.87 Gbps receive speed of my OpenBSD guest
>> > with MTU size 1500 on ESXi 8.0.
>> 
>> Hi, I had some reports that my patch doesn't work on ESXi while attaching
>> an iavf device. The reporter said the following error messages are shown
>> in the dmesg.
>> 
>> ```
>> iavf0: SET_RSS_HENA failed: -1
>> iavf0: queue op 9 failed: -1
>> ```
>> 
>> Both errors had an error code '-1', meaning the response from the PF driver
>> timed out. The `SET_RSS_HENA` request sends a packet classifier value for
>> the RSS hash filter which currently sends 0. Some PF driver version of ESXi
>> ignores the 0 value. So, I added the default value referring to the NetBSD
>> driver. The value definition is the same as the ixl(4). I split the
>> definitions to the 'if_iavfvars.h' file to share the code.
>> 
>> The `queue op 9 failed` message happened in the 'iavf_queue_select' function.
>> This seems really timed out. I extended the time-out value to 3000 ms. This
>> value is also taken from NetBSD.
>> 
>> I merged my code that handles a PCI bus error case in my previous mail.
>> 
>> https://marc.info/?l=openbsd-tech&m=172723210819245&w=2
>> 
>> I also merged Jan's code that has VLAN #ifdef. The checksum offload code is
>> the same as Jan's. If you see the diff from Jan's code, you will see my code
>> only.
>> 
>> https://marc.info/?l=openbsd-tech&m=173040636900369&w=2
>> 
>> OK?
> 
> I tested your diff on my KVM setup.  Works for me there.  I had no time
> for ESXi tests yet.
> 
> Could you split your diff in checksum offload, TSO and Multi-Queue.
> Thus, its easier to review and to see where the problems are.

Sure. I split my patch into the following 4 patches.

1. check-sum offloading
2. TSO support
3. Multi-queue support
4. PCI bus error handling

Please apply by this order.

Here is the check-sum offloading patch, originally you wrote it.
I changed the 'ixl_rx_checksum' function name to 'iavf_rx_checksum'.
It looks like a simple mistake. No functional change is intended.

diff --git a/sys/dev/pci/if_iavf.c b/sys/dev/pci/if_iavf.c
index d573d6725f4..aac22b8f378 100644
--- a/sys/dev/pci/if_iavf.c
+++ b/sys/dev/pci/if_iavf.c
@@ -49,6 +49,7 @@
  */
 
 #include "bpfilter.h"
+#include "vlan.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -75,6 +76,7 @@
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
+#include <netinet/udp.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
@@ -890,11 +892,13 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
 	ifq_init_maxlen(&ifp->if_snd, sc->sc_tx_ring_ndescs);
 
-	ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
-#if 0
-	ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 |
-	    IFCAP_CSUM_UDPv4;
+	ifp->if_capabilities = IFCAP_VLAN_MTU;
+#if NVLAN > 0
+	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
 #endif
+	ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
+	    IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
+	    IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
 
 	ifmedia_init(&sc->sc_media, 0, iavf_media_change, iavf_media_status);
 
@@ -1656,6 +1660,57 @@ iavf_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT));
 }
 
+static uint64_t
+iavf_tx_offload(struct mbuf *m)
+{
+	struct ether_extracted ext;
+	uint64_t hlen;
+	uint64_t offload = 0;
+
+#if NVLAN > 0
+	if (ISSET(m->m_flags, M_VLANTAG)) {
+		uint64_t vtag = m->m_pkthdr.ether_vtag;
+		offload |= IAVF_TX_DESC_CMD_IL2TAG1;
+		offload |= vtag << IAVF_TX_DESC_L2TAG1_SHIFT;
+	}
+#endif
+
+	if (!ISSET(m->m_pkthdr.csum_flags,
+	    M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+		return (offload);
+
+	ether_extract_headers(m, &ext);
+
+	if (ext.ip4) {
+		offload |= ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_OUT) ?
+		    IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM :
+		    IAVF_TX_DESC_CMD_IIPT_IPV4;
+#ifdef INET6
+	} else if (ext.ip6) {
+		offload |= IAVF_TX_DESC_CMD_IIPT_IPV6;
+#endif
+	} else {
+		panic("CSUM_OUT set for non-IP packet");
+		/* NOTREACHED */
+	}
+	hlen = ext.iphlen;
+
+	offload |= (ETHER_HDR_LEN >> 1) << IAVF_TX_DESC_MACLEN_SHIFT;
+	offload |= (hlen >> 2) << IAVF_TX_DESC_IPLEN_SHIFT;
+
+	if (ext.tcp && ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) {
+		offload |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
+		offload |= (uint64_t)(ext.tcphlen >> 2)
+		    << IAVF_TX_DESC_L4LEN_SHIFT;
+	} else if (ext.udp && ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) {
+		offload |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
+		offload |= (uint64_t)(sizeof(*ext.udp) >> 2)
+		    << IAVF_TX_DESC_L4LEN_SHIFT;
+	}
+
+	return offload;
+}
+
 static void
 iavf_start(struct ifqueue *ifq)
 {
@@ -1667,7 +1722,7 @@ iavf_start(struct ifqueue *ifq)
 	bus_dmamap_t map;
 	struct mbuf *m;
 	uint64_t cmd;
-	uint64_t vlan_cmd;
+	uint64_t offload;
 	unsigned int prod, free, last, i;
 	unsigned int mask;
 	int post = 0;
@@ -1702,6 +1757,8 @@ iavf_start(struct ifqueue *ifq)
 		if (m == NULL)
 			break;
 
+		offload = iavf_tx_offload(m);
+
 		txm = &txr->txr_maps[prod];
 		map = txm->txm_map;
 
@@ -1714,20 +1771,13 @@ iavf_start(struct ifqueue *ifq)
 		bus_dmamap_sync(sc->sc_dmat, map, 0,
 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
 
-		vlan_cmd = 0;
-		if (m->m_flags & M_VLANTAG) {
-			vlan_cmd = IAVF_TX_DESC_CMD_IL2TAG1 |
-			    (((uint64_t)m->m_pkthdr.ether_vtag) <<
-			    IAVF_TX_DESC_L2TAG1_SHIFT);
-		}
-
 		for (i = 0; i < map->dm_nsegs; i++) {
 			txd = &ring[prod];
 
 			cmd = (uint64_t)map->dm_segs[i].ds_len <<
 			    IAVF_TX_DESC_BSIZE_SHIFT;
-			cmd |= IAVF_TX_DESC_DTYPE_DATA | IAVF_TX_DESC_CMD_ICRC |
-			    vlan_cmd;
+			cmd |= IAVF_TX_DESC_DTYPE_DATA | IAVF_TX_DESC_CMD_ICRC;
+			cmd |= offload;
 
 			htolem64(&txd->addr, map->dm_segs[i].ds_addr);
 			htolem64(&txd->cmd, cmd);
@@ -1938,6 +1988,24 @@ iavf_rxr_free(struct iavf_softc *sc, struct iavf_rx_ring *rxr)
 	free(rxr, M_DEVBUF, sizeof(*rxr));
 }
 
+static void
+iavf_rx_checksum(struct mbuf *m, uint64_t word)
+{
+	if (!ISSET(word, IAVF_RX_DESC_L3L4P))
+		return;
+
+	if (ISSET(word, IAVF_RX_DESC_IPE))
+		return;
+
+	m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
+
+	if (ISSET(word, IAVF_RX_DESC_L4E))
+		return;
+
+	m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
+}
+
+
 static int
 iavf_rxeof(struct iavf_softc *sc, struct ifiqueue *ifiq)
 {
@@ -2002,6 +2070,7 @@ iavf_rxeof(struct iavf_softc *sc, struct ifiqueue *ifiq)
 		m->m_pkthdr.len += len;
 
 		if (ISSET(word, IAVF_RX_DESC_EOP)) {
+#if NVLAN > 0
 			if (ISSET(word, IAVF_RX_DESC_L2TAG1P)) {
 				vlan = (lemtoh64(&rxd->qword0) &
 				    IAVF_RX_DESC_L2TAG1_MASK)
@@ -2009,8 +2078,10 @@ iavf_rxeof(struct iavf_softc *sc, struct ifiqueue *ifiq)
 				m->m_pkthdr.ether_vtag = vlan;
 				m->m_flags |= M_VLANTAG;
 			}
+#endif
 			if (!ISSET(word,
 			    IAVF_RX_DESC_RXE | IAVF_RX_DESC_OVERSIZE)) {
+				iavf_rx_checksum(m, word);
 				ml_enqueue(&ml, m);
 			} else {
 				ifp->if_ierrors++; /* XXX */

Here is the TSO support patch. This is almost the same as ixl(4).

diff --git a/sys/dev/pci/if_iavf.c b/sys/dev/pci/if_iavf.c
index aac22b8f378..5f39b36dc20 100644
--- a/sys/dev/pci/if_iavf.c
+++ b/sys/dev/pci/if_iavf.c
@@ -82,6 +82,8 @@
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcidevs.h>
 
+#define IAVF_MAX_DMA_SEG_SIZE		((16 * 1024) - 1)
+
 #define I40E_MASK(mask, shift)		((mask) << (shift))
 #define I40E_AQ_LARGE_BUF		512
 
@@ -388,6 +390,10 @@ struct iavf_tx_desc {
 #define IAVF_TX_DESC_BSIZE_MASK		\
 	(IAVF_TX_DESC_BSIZE_MAX << IAVF_TX_DESC_BSIZE_SHIFT)
 
+#define IAVF_TX_CTX_DESC_CMD_TSO	0x10
+#define IAVF_TX_CTX_DESC_TLEN_SHIFT	30
+#define IAVF_TX_CTX_DESC_MSS_SHIFT	50
+
 #define IAVF_TX_DESC_L2TAG1_SHIFT	48
 #define IAVF_TX_DESC_L2TAG1_MASK	(0xffff << IAVF_TX_DESC_L2TAG1_SHIFT)
 } __packed __aligned(16);
@@ -899,6 +905,7 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 	ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
 	    IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
 	    IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+	ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
 
 	ifmedia_init(&sc->sc_media, 0, iavf_media_change, iavf_media_status);
 
@@ -1565,7 +1572,7 @@ iavf_txr_alloc(struct iavf_softc *sc, unsigned int qid)
 		txm = &maps[i];
 
 		if (bus_dmamap_create(sc->sc_dmat,
-		    IAVF_HARDMTU, IAVF_TX_PKT_DESCS, IAVF_HARDMTU, 0,
+		    MAXMCLBYTES, IAVF_TX_PKT_DESCS, IAVF_MAX_DMA_SEG_SIZE, 0,
 		    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
 		    &txm->txm_map) != 0)
 			goto uncreate;
@@ -1661,7 +1668,7 @@ iavf_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
 }
 
 static uint64_t
-iavf_tx_offload(struct mbuf *m)
+iavf_tx_offload(struct mbuf *m, struct iavf_tx_ring *txr, unsigned int prod)
 {
 	struct ether_extracted ext;
 	uint64_t hlen;
@@ -1676,7 +1683,7 @@ iavf_tx_offload(struct mbuf *m)
 #endif
 
 	if (!ISSET(m->m_pkthdr.csum_flags,
-	    M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+	    M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO))
 		return (offload);
 
 	ether_extract_headers(m, &ext);
@@ -1708,6 +1715,32 @@ iavf_tx_offload(struct mbuf *m)
 		    << IAVF_TX_DESC_L4LEN_SHIFT;
 	}
 
+	if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+		if (ext.tcp && m->m_pkthdr.ph_mss > 0) {
+			struct iavf_tx_desc *ring, *txd;
+			uint64_t cmd = 0, paylen, outlen;
+
+			hlen += ext.tcphlen;
+
+			/*
+			 * The MSS should not be set to a lower value than 64
+			 * or larger than 9668 bytes.
+			 */
+			outlen = MIN(9668, MAX(64, m->m_pkthdr.ph_mss));
+			paylen = m->m_pkthdr.len - ETHER_HDR_LEN - hlen;
+			ring = IAVF_DMA_KVA(&txr->txr_mem);
+			txd = &ring[prod];
+
+			cmd |= IAVF_TX_DESC_DTYPE_CONTEXT;
+			cmd |= IAVF_TX_CTX_DESC_CMD_TSO;
+			cmd |= paylen << IAVF_TX_CTX_DESC_TLEN_SHIFT;
+			cmd |= outlen << IAVF_TX_CTX_DESC_MSS_SHIFT;
+
+			htolem64(&txd->addr, 0);
+			htolem64(&txd->cmd, cmd);
+		}
+	}
+
 	return offload;
 }
 
@@ -1748,7 +1781,8 @@ iavf_start(struct ifqueue *ifq)
 	mask = sc->sc_tx_ring_ndescs - 1;
 
 	for (;;) {
-		if (free <= IAVF_TX_PKT_DESCS) {
+		/* We need one extra descriptor for TSO packets. */
+		if (free <= (IAVF_TX_PKT_DESCS + 1)) {
 			ifq_set_oactive(ifq);
 			break;
 		}
@@ -1757,11 +1791,17 @@ iavf_start(struct ifqueue *ifq)
 		if (m == NULL)
 			break;
 
-		offload = iavf_tx_offload(m);
+		offload = iavf_tx_offload(m, txr, prod);
 
 		txm = &txr->txr_maps[prod];
 		map = txm->txm_map;
 
+		if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+			prod++;
+			prod &= mask;
+			free--;
+		}
+
 		if (iavf_load_mbuf(sc->sc_dmat, map, m) != 0) {
 			ifq->ifq_errors++;
 			m_freem(m);

Here is the Multi-queue support. Since this patch, iavf(4) works on ESXi.
Including the fix of 'SET_RSS_HENA' and the PF response timeout problem.

diff --git a/sys/dev/pci/if_iavf.c b/sys/dev/pci/if_iavf.c
index 5f39b36dc20..1226b953821 100644
--- a/sys/dev/pci/if_iavf.c
+++ b/sys/dev/pci/if_iavf.c
@@ -63,6 +63,7 @@
 #include <sys/timeout.h>
 #include <sys/task.h>
 #include <sys/syslog.h>
+#include <sys/intrmap.h>
 
 #include <machine/bus.h>
 #include <machine/intr.h>
@@ -82,6 +83,11 @@
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcidevs.h>
 
+#ifndef CACHE_LINE_SIZE
+#define CACHE_LINE_SIZE 64
+#endif
+
+#define IAVF_MAX_VECTORS		4
 #define IAVF_MAX_DMA_SEG_SIZE		((16 * 1024) - 1)
 
 #define I40E_MASK(mask, shift)		((mask) << (shift))
@@ -93,7 +99,10 @@
 #define IAVF_VFR_COMPLETED		1
 #define IAVF_VFR_VFACTIVE		2
 
+#define IAVF_EXEC_TIMEOUT               3000
+
 #include <dev/pci/if_ixlreg.h>
+#include <dev/pci/if_ixlvar.h>
 
 struct iavf_aq_desc {
 	uint16_t	iaq_flags;
@@ -534,6 +543,7 @@ struct iavf_tx_map {
 struct iavf_tx_ring {
 	unsigned int		 txr_prod;
 	unsigned int		 txr_cons;
+	struct ifqueue		*txr_ifq;
 
 	struct iavf_tx_map	*txr_maps;
 	struct iavf_dmamem	 txr_mem;
@@ -549,6 +559,7 @@ struct iavf_rx_map {
 
 struct iavf_rx_ring {
 	struct iavf_softc	*rxr_sc;
+	struct ifiqueue		*rxr_ifiq;
 
 	struct if_rxring	 rxr_acct;
 	struct timeout		 rxr_refill;
@@ -566,17 +577,36 @@ struct iavf_rx_ring {
 	unsigned int		 rxr_qid;
 };
 
+struct iavf_vector {
+	struct iavf_softc	*iv_sc;
+	struct iavf_rx_ring	*iv_rxr;
+	struct iavf_tx_ring	*iv_txr;
+	int			 iv_qid;
+	void			*iv_ihc;
+	char			 iv_name[16];
+} __aligned(CACHE_LINE_SIZE);
+
+enum i40e_mac_type {
+        I40E_MAC_XL710,
+        I40E_MAC_X722,
+        I40E_MAC_X722_VF,
+        I40E_MAC_VF,
+        I40E_MAC_GENERIC
+};
+
 struct iavf_softc {
 	struct device		 sc_dev;
 	struct arpcom		 sc_ac;
 	struct ifmedia		 sc_media;
 	uint64_t		 sc_media_status;
 	uint64_t		 sc_media_active;
+	enum i40e_mac_type       sc_mac_type;
 
 	pci_chipset_tag_t	 sc_pc;
 	pci_intr_handle_t	 sc_ih;
 	void			*sc_ihc;
 	pcitag_t		 sc_tag;
+	struct intrmap		*sc_intrmap;
 
 	bus_dma_tag_t		 sc_dmat;
 	bus_space_tag_t		 sc_memt;
@@ -620,6 +650,9 @@ struct iavf_softc {
 	unsigned int		 sc_tx_ring_ndescs;
 	unsigned int		 sc_rx_ring_ndescs;
 	unsigned int		 sc_nqueues;	/* 1 << sc_nqueues */
+	unsigned int             sc_nintrs;
+
+	struct iavf_vector	*sc_vectors;
 
 	struct rwlock		 sc_cfg_lock;
 	unsigned int		 sc_dead;
@@ -644,6 +677,7 @@ static void	iavf_atq_done(struct iavf_softc *);
 
 static void	iavf_init_admin_queue(struct iavf_softc *);
 
+static enum i40e_mac_type iavf_mactype(pci_product_id_t);
 static int	iavf_get_version(struct iavf_softc *);
 static int	iavf_get_vf_resources(struct iavf_softc *);
 static int	iavf_config_irq_map(struct iavf_softc *);
@@ -652,6 +686,7 @@ static int	iavf_add_del_addr(struct iavf_softc *, uint8_t *, int);
 static int	iavf_process_arq(struct iavf_softc *, int);
 
 static int	iavf_match(struct device *, void *, void *);
+static int	iavf_setup_interrupts(struct iavf_softc *, struct pci_attach_args *);
 static void	iavf_attach(struct device *, struct device *, void *);
 
 static int	iavf_media_change(struct ifnet *);
@@ -660,6 +695,7 @@ static void	iavf_watchdog(struct ifnet *);
 static int	iavf_ioctl(struct ifnet *, u_long, caddr_t);
 static void	iavf_start(struct ifqueue *);
 static int	iavf_intr(void *);
+static int	iavf_intr_vector(void *);
 static int	iavf_up(struct iavf_softc *);
 static int	iavf_down(struct iavf_softc *);
 static int	iavf_iff(struct iavf_softc *);
@@ -723,9 +759,17 @@ static const struct iavf_aq_regs iavf_aq_regs = {
 	    I40E_VFINT_DYN_CTL0_CLEARPBA_MASK | \
 	    (IAVF_NOITR << I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT)); \
 	iavf_wr((_s), I40E_VFINT_ICR0_ENA1, I40E_VFINT_ICR0_ENA1_ADMINQ_MASK)
+#define iavf_queue_intr_enable(_s, _q)					\
+        iavf_wr((_s), I40E_VFINT_DYN_CTLN1((_q)),			\
+		I40E_VFINT_DYN_CTLN1_INTENA_MASK |			\
+		I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |			\
+		(IAVF_NOITR << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT))
+#define iavf_queue_intr_disable(_s, _q)					\
+        iavf_wr((_s), I40E_VFINT_DYN_CTLN1((_q)),			\
+		(IAVF_NOITR << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT))
 
 #define iavf_nqueues(_sc)	(1 << (_sc)->sc_nqueues)
-#define iavf_allqueues(_sc)	((1 << ((_sc)->sc_nqueues+1)) - 1)
+#define iavf_allqueues(_sc)	((1 << (iavf_nqueues(_sc))) - 1)
 
 #ifdef __LP64__
 #define iavf_dmamem_hi(_ixm)	(uint32_t)(IAVF_DMA_DVA(_ixm) >> 32)
@@ -765,6 +809,107 @@ iavf_match(struct device *parent, void *match, void *aux)
 	return (pci_matchbyid(aux, iavf_devices, nitems(iavf_devices)));
 }
 
+static enum i40e_mac_type
+iavf_mactype(pci_product_id_t id)
+{
+
+        switch (id) {
+        case PCI_PRODUCT_INTEL_XL710_VF:
+        case PCI_PRODUCT_INTEL_XL710_VF_HV:
+                return I40E_MAC_VF;
+        case PCI_PRODUCT_INTEL_X722_VF:
+                return I40E_MAC_X722_VF;
+        }
+
+        return I40E_MAC_GENERIC;
+}
+
+static int
+iavf_intr_vector(void *v)
+{
+	struct iavf_vector *iv = v;
+	struct iavf_softc *sc = iv->iv_sc;
+
+	struct ifnet *ifp = &sc->sc_ac.ac_if;
+	int rv = 0;
+
+	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+		rv |= iavf_rxeof(sc, iv->iv_rxr->rxr_ifiq);
+		rv |= iavf_txeof(sc, iv->iv_txr->txr_ifq);
+	}
+
+	iavf_queue_intr_enable(sc, iv->iv_qid);
+
+	return rv;
+}
+
+static int
+iavf_setup_interrupts(struct iavf_softc *sc, struct pci_attach_args *pa)
+{
+	unsigned int i, v, nqueues = iavf_nqueues(sc);
+	struct iavf_vector *iv;
+	pci_intr_handle_t ih;
+
+	sc->sc_ihc = pci_intr_establish(sc->sc_pc, sc->sc_ih,
+	    IPL_NET | IPL_MPSAFE, iavf_intr, sc, DEVNAME(sc));
+	if (sc->sc_ihc == NULL) {
+		printf("%s: unable to establish interrupt handler\n",
+		    DEVNAME(sc));
+		return -1;
+	}
+
+	sc->sc_vectors = mallocarray(sizeof(*sc->sc_vectors), nqueues,
+	    M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
+	if (sc->sc_vectors == NULL) {
+		printf("%s: unable to allocate vectors\n", DEVNAME(sc));
+		return -1;
+	}
+
+	for (i = 0; i < nqueues; i++) {
+		iv = &sc->sc_vectors[i];
+		iv->iv_sc = sc;
+		iv->iv_qid = i;
+		snprintf(iv->iv_name, sizeof(iv->iv_name), "%s:%u",
+			 DEVNAME(sc), i);
+	}
+
+	if (sc->sc_intrmap) {
+		for (i = 0; i < nqueues; i++) {
+			iv = &sc->sc_vectors[i];
+			v = i + 1; /* 0 is used for adminq */
+
+			if (pci_intr_map_msix(pa, v, &ih)) {
+				printf("%s: unable to map msi-x vector %d\n",
+				    DEVNAME(sc), v);
+				goto free_vectors;
+			}
+
+			iv->iv_ihc = pci_intr_establish_cpu(sc->sc_pc, ih,
+			    IPL_NET | IPL_MPSAFE,
+			    intrmap_cpu(sc->sc_intrmap, i),
+			    iavf_intr_vector, iv, iv->iv_name);
+			if (iv->iv_ihc == NULL) {
+				printf("%s: unable to establish interrupt %d\n",
+				    DEVNAME(sc), v);
+				goto free_vectors;
+			}
+		}
+	}
+
+	sc->sc_nintrs = nqueues + 1;
+	return 0;
+free_vectors:
+	if (sc->sc_intrmap != NULL) {
+		for (i = 0; i < nqueues; i++) {
+			struct iavf_vector *iv = &sc->sc_vectors[i];
+			if (iv->iv_ihc != NULL)
+				pci_intr_disestablish(sc->sc_pc, iv->iv_ihc);
+		}
+	}
+	free(sc->sc_vectors, M_DEVBUF, nqueues * sizeof(*sc->sc_vectors));
+	return -1;
+}
+
 void
 iavf_attach(struct device *parent, struct device *self, void *aux)
 {
@@ -772,7 +917,8 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 	struct ifnet *ifp = &sc->sc_ac.ac_if;
 	struct pci_attach_args *pa = aux;
 	pcireg_t memtype;
-	int tries;
+	int nmsix, tries;
+	unsigned int nqueues;
 
 	rw_init(&sc->sc_cfg_lock, "iavfcfg");
 
@@ -781,6 +927,8 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 	sc->sc_dmat = pa->pa_dmat;
 	sc->sc_aq_regs = &iavf_aq_regs;
 
+	sc->sc_mac_type = iavf_mactype(PCI_PRODUCT(pa->pa_id));
+
 	sc->sc_nqueues = 0; /* 1 << 0 is 1 queue */
 	sc->sc_tx_ring_ndescs = 1024;
 	sc->sc_rx_ring_ndescs = 1024;
@@ -860,13 +1008,20 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 		goto free_scratch;
 	}
 
-	if (iavf_config_irq_map(sc) != 0) {
-		printf(", timeout waiting for IRQ map response");
-		goto free_scratch;
-	}
-
 	/* msix only? */
-	if (pci_intr_map_msix(pa, 0, &sc->sc_ih) != 0) {
+	if (pci_intr_map_msix(pa, 0, &sc->sc_ih) == 0) {
+		nmsix = pci_intr_msix_count(pa);
+		if (nmsix > 1) { /* we used 1 (the 0th) for the adminq */
+			nmsix--;
+
+			sc->sc_intrmap = intrmap_create(&sc->sc_dev,
+			    nmsix, IAVF_MAX_VECTORS, INTRMAP_POWEROF2);
+			nqueues = intrmap_count(sc->sc_intrmap);
+			KASSERT(nqueues > 0);
+			KASSERT(powerof2(nqueues));
+			sc->sc_nqueues = fls(nqueues) - 1;
+		}
+	} else {
 		printf(", unable to map interrupt\n");
 		goto free_scratch;
 	}
@@ -876,17 +1031,23 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 	if (memcmp(sc->sc_ac.ac_enaddr, etheranyaddr, ETHER_ADDR_LEN) == 0)
 		ether_fakeaddr(ifp);
 
-	printf(", %s, address %s\n", pci_intr_string(sc->sc_pc, sc->sc_ih),
-	    ether_sprintf(sc->sc_ac.ac_enaddr));
+	nqueues = iavf_nqueues(sc);
+	printf(", %s, %d queue%s, address %s\n",
+	       pci_intr_string(sc->sc_pc, sc->sc_ih),
+	       nqueues, (nqueues > 1 ? "s" : ""),
+	       ether_sprintf(sc->sc_ac.ac_enaddr));
 
-	sc->sc_ihc = pci_intr_establish(sc->sc_pc, sc->sc_ih,
-	    IPL_NET | IPL_MPSAFE, iavf_intr, sc, DEVNAME(sc));
-	if (sc->sc_ihc == NULL) {
+	if (iavf_setup_interrupts(sc, pa) != 0) {
 		printf("%s: unable to establish interrupt handler\n",
 		    DEVNAME(sc));
 		goto free_scratch;
 	}
 
+	if (iavf_config_irq_map(sc) != 0) {
+		printf(", timeout waiting for IRQ map response");
+		goto free_scratch;
+	}
+
 	ifp->if_softc = sc;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_xflags = IFXF_MPSAFE;
@@ -1106,7 +1267,7 @@ iavf_config_vsi_queues(struct iavf_softc *sc)
 	    BUS_DMASYNC_PREREAD);
 
 	iavf_atq_post(sc, &iaq);
-	rv = iavf_arq_wait(sc, 250);
+	rv = iavf_arq_wait(sc, IAVF_EXEC_TIMEOUT);
 	if (rv != IAVF_VC_RC_SUCCESS) {
 		printf("%s: CONFIG_VSI_QUEUES failed: %d\n", DEVNAME(sc), rv);
 		return (1);
@@ -1130,10 +1291,11 @@ iavf_config_hena(struct iavf_softc *sc)
 	iavf_aq_dva(&iaq, IAVF_DMA_DVA(&sc->sc_scratch));
 
 	caps = IAVF_DMA_KVA(&sc->sc_scratch);
-	*caps = 0;
+	*caps = (sc->sc_mac_type == I40E_MAC_X722_VF) ? IXL_RSS_HENA_BASE_722 :
+		IXL_RSS_HENA_BASE_710;
 
 	iavf_atq_post(sc, &iaq);
-	rv = iavf_arq_wait(sc, 250);
+	rv = iavf_arq_wait(sc, IAVF_EXEC_TIMEOUT);
 	if (rv != IAVF_VC_RC_SUCCESS) {
 		printf("%s: SET_RSS_HENA failed: %d\n", DEVNAME(sc), rv);
 		return (1);
@@ -1168,7 +1330,7 @@ iavf_queue_select(struct iavf_softc *sc, int opcode)
 	    BUS_DMASYNC_PREREAD);
 
 	iavf_atq_post(sc, &iaq);
-	rv = iavf_arq_wait(sc, 250);
+	rv = iavf_arq_wait(sc, IAVF_EXEC_TIMEOUT);
 	if (rv != IAVF_VC_RC_SUCCESS) {
 		printf("%s: queue op %d failed: %d\n", DEVNAME(sc), opcode, rv);
 		return (1);
@@ -1181,13 +1343,13 @@ static int
 iavf_up(struct iavf_softc *sc)
 {
 	struct ifnet *ifp = &sc->sc_ac.ac_if;
+	struct iavf_vector  *iv;
 	struct iavf_rx_ring *rxr;
 	struct iavf_tx_ring *txr;
 	unsigned int nqueues, i;
 	int rv = ENOMEM;
 
 	nqueues = iavf_nqueues(sc);
-	KASSERT(nqueues == 1); /* XXX */
 
 	rw_enter_write(&sc->sc_cfg_lock);
 	if (sc->sc_dead) {
@@ -1206,8 +1368,11 @@ iavf_up(struct iavf_softc *sc)
 			goto free;
 		}
 
-		ifp->if_iqs[i]->ifiq_softc = rxr;
-		ifp->if_ifqs[i]->ifq_softc = txr;
+		iv = &sc->sc_vectors[i];
+		iv->iv_rxr = ifp->if_iqs[i]->ifiq_softc = rxr;
+		iv->iv_txr = ifp->if_ifqs[i]->ifq_softc = txr;
+		rxr->rxr_ifiq = ifp->if_iqs[i];
+		txr->txr_ifq = ifp->if_ifqs[i];
 
 		iavf_rxfill(sc, rxr);
 	}
@@ -1221,6 +1386,9 @@ iavf_up(struct iavf_softc *sc)
 	if (iavf_queue_select(sc, IAVF_VC_OP_ENABLE_QUEUES) != 0)
 		goto down;
 
+	for (i = 0; i < nqueues; i++)
+		iavf_queue_intr_enable(sc, i);
+
 	SET(ifp->if_flags, IFF_RUNNING);
 
 	iavf_wr(sc, I40E_VFINT_ITR01(0), 0x7a);
@@ -1246,6 +1414,9 @@ free:
 
 		iavf_txr_free(sc, txr);
 		iavf_rxr_free(sc, rxr);
+		iv = &sc->sc_vectors[i];
+		iv->iv_rxr = ifp->if_iqs[i]->ifiq_softc = NULL;
+		iv->iv_txr = ifp->if_ifqs[i]->ifq_softc = NULL;
 	}
 	rw_exit_write(&sc->sc_cfg_lock);
 	return (rv);
@@ -1284,7 +1455,7 @@ iavf_config_promisc_mode(struct iavf_softc *sc, int unicast, int multicast)
 	    BUS_DMASYNC_PREREAD);
 
 	iavf_atq_post(sc, &iaq);
-	rv = iavf_arq_wait(sc, 250);
+	rv = iavf_arq_wait(sc, IAVF_EXEC_TIMEOUT);
 	if (rv != IAVF_VC_RC_SUCCESS) {
 		printf("%s: CONFIG_PROMISC_MODE failed: %d\n", DEVNAME(sc), rv);
 		return (1);
@@ -1323,7 +1494,7 @@ iavf_add_del_addr(struct iavf_softc *sc, uint8_t *addr, int add)
 	    BUS_DMASYNC_PREREAD);
 
 	iavf_atq_post(sc, &iaq);
-	rv = iavf_arq_wait(sc, 250);
+	rv = iavf_arq_wait(sc, IAVF_EXEC_TIMEOUT);
 	if (rv != IAVF_VC_RC_SUCCESS) {
 		printf("%s: ADD/DEL_ETH_ADDR failed: %d\n", DEVNAME(sc), rv);
 		return (1);
@@ -1368,6 +1539,7 @@ static int
 iavf_down(struct iavf_softc *sc)
 {
 	struct ifnet *ifp = &sc->sc_ac.ac_if;
+	struct iavf_vector  *iv;
 	struct iavf_rx_ring *rxr;
 	struct iavf_tx_ring *txr;
 	unsigned int nqueues, i;
@@ -1397,6 +1569,8 @@ iavf_down(struct iavf_softc *sc)
 	/* make sure no hw generated work is still in flight */
 	intr_barrier(sc->sc_ihc);
 	for (i = 0; i < nqueues; i++) {
+		iavf_queue_intr_disable(sc, i);
+
 		rxr = ifp->if_iqs[i]->ifiq_softc;
 		txr = ifp->if_ifqs[i]->ifq_softc;
 
@@ -1406,8 +1580,9 @@ iavf_down(struct iavf_softc *sc)
 	}
 
 	for (i = 0; i < nqueues; i++) {
-		rxr = ifp->if_iqs[i]->ifiq_softc;
-		txr = ifp->if_ifqs[i]->ifq_softc;
+		iv = &sc->sc_vectors[i];
+		txr = iv->iv_txr;
+		rxr = iv->iv_rxr;
 
 		iavf_txr_clean(sc, txr);
 		iavf_rxr_clean(sc, rxr);
@@ -1415,8 +1590,8 @@ iavf_down(struct iavf_softc *sc)
 		iavf_txr_free(sc, txr);
 		iavf_rxr_free(sc, rxr);
 
-		ifp->if_iqs[i]->ifiq_softc = NULL;
-		ifp->if_ifqs[i]->ifq_softc =  NULL;
+		iv->iv_rxr = ifp->if_iqs[i]->ifiq_softc = NULL;
+		iv->iv_txr = ifp->if_ifqs[i]->ifq_softc = NULL;
 	}
 
 	/* unmask */
@@ -2666,25 +2841,45 @@ iavf_config_irq_map(struct iavf_softc *sc)
 	struct iavf_aq_desc iaq;
 	struct iavf_vc_vector_map *vec;
 	struct iavf_vc_irq_map_info *map;
+	struct iavf_vector *iv;
+	unsigned int num_vec = 0;
 	int tries;
 
 	memset(&iaq, 0, sizeof(iaq));
 	iaq.iaq_flags = htole16(IAVF_AQ_BUF | IAVF_AQ_RD);
 	iaq.iaq_opcode = htole16(IAVF_AQ_OP_SEND_TO_PF);
 	iaq.iaq_vc_opcode = htole32(IAVF_VC_OP_CONFIG_IRQ_MAP);
-	iaq.iaq_datalen = htole16(sizeof(*map) + sizeof(*vec));
+	iaq.iaq_datalen = htole16(sizeof(*map) + sizeof(*vec) * sc->sc_nintrs);
 	iavf_aq_dva(&iaq, IAVF_DMA_DVA(&sc->sc_scratch));
 
 	map = IAVF_DMA_KVA(&sc->sc_scratch);
-	map->num_vectors = htole16(1);
 
 	vec = map->vecmap;
-	vec[0].vsi_id = htole16(sc->sc_vsi_id);
-	vec[0].vector_id = 0;
-	vec[0].rxq_map = htole16(iavf_allqueues(sc));
-	vec[0].txq_map = htole16(iavf_allqueues(sc));
-	vec[0].rxitr_idx = htole16(IAVF_NOITR);
-	vec[0].txitr_idx = htole16(IAVF_NOITR);
+	if (sc->sc_nintrs == 1) {
+		vec[num_vec].vsi_id = htole16(sc->sc_vsi_id);
+		vec[num_vec].vector_id = htole16(num_vec);
+		vec[num_vec].rxq_map = htole16(iavf_allqueues(sc));
+		vec[num_vec].txq_map = htole16(iavf_allqueues(sc));
+		vec[num_vec].rxitr_idx = htole16(IAVF_NOITR);
+		vec[num_vec].txitr_idx = htole16(IAVF_NOITR);
+		num_vec++;
+	} else if (sc->sc_nintrs > 1) {
+		for (; num_vec < sc->sc_nintrs - 1; num_vec++) {
+			iv = &sc->sc_vectors[num_vec];
+			vec[num_vec].vsi_id = htole16(sc->sc_vsi_id);
+			vec[num_vec].vector_id = htole16(num_vec + 1);
+			vec[num_vec].rxq_map = htole16(1 << iv->iv_qid);
+			vec[num_vec].txq_map = htole16(1 << iv->iv_qid);
+			vec[num_vec].rxitr_idx = htole16(IAVF_ITR0);
+			vec[num_vec].txitr_idx = htole16(IAVF_ITR1);
+		}
+		vec[num_vec].vsi_id = htole16(sc->sc_vsi_id);
+		vec[num_vec].vector_id = htole16(0);
+		vec[num_vec].rxq_map = htole16(0);
+		vec[num_vec].txq_map = htole16(0);
+		num_vec++;
+	}
+	map->num_vectors = htole16(num_vec);
 
 	bus_dmamap_sync(sc->sc_dmat, IAVF_DMA_MAP(&sc->sc_scratch), 0, IAVF_DMA_LEN(&sc->sc_scratch),
 	    BUS_DMASYNC_PREREAD);
diff --git a/sys/dev/pci/if_ixl.c b/sys/dev/pci/if_ixl.c
index 12c84ba2c79..66bbf2415ed 100644
--- a/sys/dev/pci/if_ixl.c
+++ b/sys/dev/pci/if_ixl.c
@@ -923,53 +923,7 @@ CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE);
 #define IXL_AQ_ALIGN			64 /* lol */
 #define IXL_AQ_BUFLEN			4096
 
-/* Packet Classifier Types for filters */
-/* bits 0-28 are reserved for future use */
-#define IXL_PCT_NONF_IPV4_UDP_UCAST	(1ULL << 29)	/* 722 */
-#define IXL_PCT_NONF_IPV4_UDP_MCAST	(1ULL << 30)	/* 722 */
-#define IXL_PCT_NONF_IPV4_UDP		(1ULL << 31)
-#define IXL_PCT_NONF_IPV4_TCP_SYN_NOACK	(1ULL << 32)	/* 722 */
-#define IXL_PCT_NONF_IPV4_TCP		(1ULL << 33)
-#define IXL_PCT_NONF_IPV4_SCTP		(1ULL << 34)
-#define IXL_PCT_NONF_IPV4_OTHER		(1ULL << 35)
-#define IXL_PCT_FRAG_IPV4		(1ULL << 36)
-/* bits 37-38 are reserved for future use */
-#define IXL_PCT_NONF_IPV6_UDP_UCAST	(1ULL << 39)	/* 722 */
-#define IXL_PCT_NONF_IPV6_UDP_MCAST	(1ULL << 40)	/* 722 */
-#define IXL_PCT_NONF_IPV6_UDP		(1ULL << 41)
-#define IXL_PCT_NONF_IPV6_TCP_SYN_NOACK	(1ULL << 42)	/* 722 */
-#define IXL_PCT_NONF_IPV6_TCP		(1ULL << 43)
-#define IXL_PCT_NONF_IPV6_SCTP		(1ULL << 44)
-#define IXL_PCT_NONF_IPV6_OTHER		(1ULL << 45)
-#define IXL_PCT_FRAG_IPV6		(1ULL << 46)
-/* bit 47 is reserved for future use */
-#define IXL_PCT_FCOE_OX			(1ULL << 48)
-#define IXL_PCT_FCOE_RX			(1ULL << 49)
-#define IXL_PCT_FCOE_OTHER		(1ULL << 50)
-/* bits 51-62 are reserved for future use */
-#define IXL_PCT_L2_PAYLOAD		(1ULL << 63)
-
-#define IXL_RSS_HENA_BASE_DEFAULT		\
-	IXL_PCT_NONF_IPV4_UDP |			\
-	IXL_PCT_NONF_IPV4_TCP |			\
-	IXL_PCT_NONF_IPV4_SCTP |		\
-	IXL_PCT_NONF_IPV4_OTHER |		\
-	IXL_PCT_FRAG_IPV4 |			\
-	IXL_PCT_NONF_IPV6_UDP |			\
-	IXL_PCT_NONF_IPV6_TCP |			\
-	IXL_PCT_NONF_IPV6_SCTP |		\
-	IXL_PCT_NONF_IPV6_OTHER |		\
-	IXL_PCT_FRAG_IPV6 |			\
-	IXL_PCT_L2_PAYLOAD
-
-#define IXL_RSS_HENA_BASE_710		IXL_RSS_HENA_BASE_DEFAULT
-#define IXL_RSS_HENA_BASE_722		IXL_RSS_HENA_BASE_DEFAULT | \
-	IXL_PCT_NONF_IPV4_UDP_UCAST |		\
-	IXL_PCT_NONF_IPV4_UDP_MCAST |		\
-	IXL_PCT_NONF_IPV6_UDP_UCAST |		\
-	IXL_PCT_NONF_IPV6_UDP_MCAST |		\
-	IXL_PCT_NONF_IPV4_TCP_SYN_NOACK |	\
-	IXL_PCT_NONF_IPV6_TCP_SYN_NOACK
+#include <dev/pci/if_ixlvar.h>
 
 #define IXL_HMC_ROUNDUP			512
 #define IXL_HMC_PGSIZE			4096
diff --git a/sys/dev/pci/if_ixlvar.h b/sys/dev/pci/if_ixlvar.h
new file mode 100644
index 00000000000..7361be66bd4
--- /dev/null
+++ b/sys/dev/pci/if_ixlvar.h
@@ -0,0 +1,102 @@
+/*	$Id$ */
+
+/*
+ * Copyright (c) 2013-2015, Intel Corporation
+ * All rights reserved.
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2016,2017 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _IXL_VAR_H_
+#define _IXL_VAR_H_
+
+/* Packet Classifier Types for filters */
+/* bits 0-28 are reserved for future use */
+#define IXL_PCT_NONF_IPV4_UDP_UCAST	(1ULL << 29)	/* 722 */
+#define IXL_PCT_NONF_IPV4_UDP_MCAST	(1ULL << 30)	/* 722 */
+#define IXL_PCT_NONF_IPV4_UDP		(1ULL << 31)
+#define IXL_PCT_NONF_IPV4_TCP_SYN_NOACK	(1ULL << 32)	/* 722 */
+#define IXL_PCT_NONF_IPV4_TCP		(1ULL << 33)
+#define IXL_PCT_NONF_IPV4_SCTP		(1ULL << 34)
+#define IXL_PCT_NONF_IPV4_OTHER		(1ULL << 35)
+#define IXL_PCT_FRAG_IPV4		(1ULL << 36)
+/* bits 37-38 are reserved for future use */
+#define IXL_PCT_NONF_IPV6_UDP_UCAST	(1ULL << 39)	/* 722 */
+#define IXL_PCT_NONF_IPV6_UDP_MCAST	(1ULL << 40)	/* 722 */
+#define IXL_PCT_NONF_IPV6_UDP		(1ULL << 41)
+#define IXL_PCT_NONF_IPV6_TCP_SYN_NOACK	(1ULL << 42)	/* 722 */
+#define IXL_PCT_NONF_IPV6_TCP		(1ULL << 43)
+#define IXL_PCT_NONF_IPV6_SCTP		(1ULL << 44)
+#define IXL_PCT_NONF_IPV6_OTHER		(1ULL << 45)
+#define IXL_PCT_FRAG_IPV6		(1ULL << 46)
+/* bit 47 is reserved for future use */
+#define IXL_PCT_FCOE_OX			(1ULL << 48)
+#define IXL_PCT_FCOE_RX			(1ULL << 49)
+#define IXL_PCT_FCOE_OTHER		(1ULL << 50)
+/* bits 51-62 are reserved for future use */
+#define IXL_PCT_L2_PAYLOAD		(1ULL << 63)
+
+#define IXL_RSS_HENA_BASE_DEFAULT		\
+	IXL_PCT_NONF_IPV4_UDP |			\
+	IXL_PCT_NONF_IPV4_TCP |			\
+	IXL_PCT_NONF_IPV4_SCTP |		\
+	IXL_PCT_NONF_IPV4_OTHER |		\
+	IXL_PCT_FRAG_IPV4 |			\
+	IXL_PCT_NONF_IPV6_UDP |			\
+	IXL_PCT_NONF_IPV6_TCP |			\
+	IXL_PCT_NONF_IPV6_SCTP |		\
+	IXL_PCT_NONF_IPV6_OTHER |		\
+	IXL_PCT_FRAG_IPV6 |			\
+	IXL_PCT_L2_PAYLOAD
+
+#define IXL_RSS_HENA_BASE_710		IXL_RSS_HENA_BASE_DEFAULT
+#define IXL_RSS_HENA_BASE_722		IXL_RSS_HENA_BASE_DEFAULT | \
+	IXL_PCT_NONF_IPV4_UDP_UCAST |		\
+	IXL_PCT_NONF_IPV4_UDP_MCAST |		\
+	IXL_PCT_NONF_IPV6_UDP_UCAST |		\
+	IXL_PCT_NONF_IPV6_UDP_MCAST |		\
+	IXL_PCT_NONF_IPV4_TCP_SYN_NOACK |	\
+	IXL_PCT_NONF_IPV6_TCP_SYN_NOACK
+
+#endif /* _IXL_VAR_H_ */
+

Here is the PCI bus error handling patch that I proposed in the following mail.

https://marc.info/?l=openbsd-tech&m=172723210819245&w=2

diff --git a/sys/dev/pci/if_iavf.c b/sys/dev/pci/if_iavf.c
index 1226b953821..f5077ff0b45 100644
--- a/sys/dev/pci/if_iavf.c
+++ b/sys/dev/pci/if_iavf.c
@@ -616,6 +616,7 @@ struct iavf_softc {
 	uint32_t		 sc_major_ver;
 	uint32_t		 sc_minor_ver;
 
+	int			 sc_if_attached;
 	int			 sc_got_vf_resources;
 	int			 sc_got_irq_map;
 	uint32_t		 sc_vf_id;
@@ -1078,6 +1079,7 @@ iavf_attach(struct device *parent, struct device *self, void *aux)
 
 	if_attach_queues(ifp, iavf_nqueues(sc));
 	if_attach_iqueues(ifp, iavf_nqueues(sc));
+	sc->sc_if_attached++;
 
 	iavf_intr_enable(sc);
 
@@ -1623,7 +1625,8 @@ iavf_reset(void *xsc)
 	link_state = ifp->if_link_state;
 	if (ifp->if_link_state != LINK_STATE_DOWN) {
 		ifp->if_link_state = LINK_STATE_DOWN;
-		if_link_state_change(ifp);
+		if (sc->sc_if_attached)
+			if_link_state_change(ifp);
 	}
 
 	up = 0;

-- 
Yuichiro NAITO (naito.yuichiro@gmail.com)