Download raw body.
hardware LRO support for bnxt(4)
This patch adds support for hardware LRO to bnxt(4).
With this patch, all currently supported devices start using hardware LRO.
The softLRO path is now unused. But it is kept intact because we will still
need it for newer devices ("P5" and up) which jmatthew@ is planning to add.
Buffers on TPA-enabled rings can be completed out-of-order. Rather than
freeing consumed slots on aggregation rings sequentially we must maintain
a bitmap which keeps track of free slots. This avoids freeing slots the
device is still using.
Tested on:
bnxt0 at pci14 dev 0 function 0 "Broadcom BCM57414" rev 0x01: fw ver 234.0.150, msix, 8 queues, address xx:xx:xx:xx:xx:xx
357020 1238298984 9906.392 100.00%
Conn: 1 Mbps: 9906.392 Peak Mbps: 9916.791 Avg Mbps: 9906.392
0 input LRO generated packets glued in software
143069252 input LRO generated packets from device
16803037 input LRO small packets received from network
0 bad LRO packets dropped
The patch requires my very recently committed "bnxt pci id fix" patch
to compile.
ok?
M sys/dev/pci/if_bnxt.c | 352+ 80-
1 file changed, 352 insertions(+), 80 deletions(-)
commit - 79c7ebb7c5ed81e0595f0844e09ea98df93f37e1
commit + 1337fda361508a90fb10fd17a1e4af6e8c2dbad8
blob - cfa5c59544fcf775cb7b29dddb2090b439ba4104
blob + 36b0f798db8dc834291959fc98ad1e40c62a410e
--- sys/dev/pci/if_bnxt.c
+++ sys/dev/pci/if_bnxt.c
@@ -97,6 +97,8 @@
#define BNXT_MAX_TX_SEGS 31
#define BNXT_TX_SLOTS(bs) (bs->bs_map->dm_nsegs + 1)
+#define BNXT_MAX_RX_SEGS 32
+
#define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input)
#define BNXT_HWRM_LOCK_INIT(_sc, _name) \
@@ -111,6 +113,7 @@
#define BNXT_FLAG_WOL_CAP 0x0004
#define BNXT_FLAG_SHORT_CMD 0x0008
#define BNXT_FLAG_MSIX 0x0010
+#define BNXT_FLAG_TPA 0x0020 /* hardware LRO */
/* NVRam stuff has a five minute timeout */
#define BNXT_NVM_TIMEO (5 * 60 * 1000)
@@ -185,6 +188,11 @@ struct bnxt_dmamem {
#define BNXT_DMA_DVA(_bdm) ((u_int64_t)(_bdm)->bdm_map->dm_segs[0].ds_addr)
#define BNXT_DMA_KVA(_bdm) ((void *)(_bdm)->bdm_kva)
+struct bnxt_full_tpa_start {
+ struct rx_tpa_start_cmpl low;
+ struct rx_tpa_start_cmpl_hi high;
+};
+
struct bnxt_rx_queue {
struct bnxt_softc *rx_softc;
struct ifiqueue *rx_ifiq;
@@ -197,8 +205,9 @@ struct bnxt_rx_queue {
int rx_prod;
int rx_cons;
int rx_ag_prod;
- int rx_ag_cons;
+ uint8_t *rx_ag_freemap;
struct timeout rx_refill;
+ struct bnxt_full_tpa_start *rx_tpas;
};
struct bnxt_tx_queue {
@@ -224,6 +233,13 @@ struct bnxt_queue {
struct bnxt_grp_info q_rg;
};
+struct bnxt_hw_lro {
+ uint16_t is_mode_gro;
+ uint16_t max_agg_segs;
+ uint16_t max_aggs;
+ uint32_t min_agg_len;
+};
+
struct bnxt_softc {
struct device sc_dev;
struct arpcom sc_ac;
@@ -263,6 +279,8 @@ struct bnxt_softc {
int sc_nqueues;
struct intrmap *sc_intrmap;
struct bnxt_queue sc_queues[BNXT_MAX_QUEUES];
+
+ struct bnxt_hw_lro sc_hw_lro;
};
#define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)
@@ -317,12 +335,18 @@ void bnxt_write_tx_doorbell(struct bnxt_softc *, stru
int bnxt_rx_fill(struct bnxt_queue *);
int bnxt_rx_fill_ag(struct bnxt_queue *);
-u_int bnxt_rx_fill_slots(struct bnxt_softc *, struct bnxt_ring *, void *,
- struct bnxt_slot *, uint *, int, uint16_t, u_int);
+u_int bnxt_rx_fill_slots(struct bnxt_softc *, struct bnxt_ring *,
+ void *, struct bnxt_slot *, uint *, uint8_t *, uint32_t,
+ int, uint16_t, u_int);
void bnxt_refill(void *);
int bnxt_rx(struct bnxt_softc *, struct bnxt_rx_queue *,
struct bnxt_cp_ring *, struct mbuf_list *, struct mbuf_list *,
int *, int *, struct cmpl_base *);
+void bnxt_rx_tpa_start(struct bnxt_softc *, struct bnxt_rx_queue *,
+ struct bnxt_cp_ring *, struct cmpl_base *);
+int bnxt_rx_tpa(struct bnxt_softc *, struct bnxt_rx_queue *,
+ struct bnxt_cp_ring *, struct mbuf_list *, int *, int *,
+ struct cmpl_base *);
void bnxt_txeof(struct bnxt_softc *, struct bnxt_tx_queue *, int *,
struct cmpl_base *);
@@ -372,6 +396,7 @@ int bnxt_hwrm_free_filter(struct bnxt_softc *,
int bnxt_hwrm_vnic_rss_cfg(struct bnxt_softc *,
struct bnxt_vnic_info *, uint32_t, daddr_t, daddr_t);
int bnxt_cfg_async_cr(struct bnxt_softc *, struct bnxt_cp_ring *);
+int bnxt_hwrm_vnic_tpa_cfg(struct bnxt_softc *);
int bnxt_hwrm_nvm_get_dev_info(struct bnxt_softc *, uint16_t *,
uint16_t *, uint32_t *, uint32_t *, uint32_t *, uint32_t *);
int bnxt_hwrm_port_phy_qcfg(struct bnxt_softc *,
@@ -385,9 +410,6 @@ int bnxt_hwrm_func_drv_unrgtr(struct bnxt_softc *softc
int bnxt_hwrm_port_qstats(struct bnxt_softc *softc);
-
-int bnxt_hwrm_vnic_tpa_cfg(struct bnxt_softc *softc);
-void bnxt_validate_hw_lro_settings(struct bnxt_softc *softc);
int bnxt_hwrm_fw_reset(struct bnxt_softc *softc, uint8_t processor,
uint8_t *selfreset);
int bnxt_hwrm_fw_qstatus(struct bnxt_softc *softc, uint8_t type,
@@ -652,6 +674,39 @@ bnxt_attach(struct device *parent, struct device *self
#endif
ifp->if_capabilities |= IFCAP_LRO;
+ switch (PCI_PRODUCT(pa->pa_id)) {
+ case PCI_PRODUCT_BROADCOM_BCM57301:
+ case PCI_PRODUCT_BROADCOM_BCM57302:
+ case PCI_PRODUCT_BROADCOM_BCM57304:
+ case PCI_PRODUCT_BROADCOM_BCM57311:
+ case PCI_PRODUCT_BROADCOM_BCM57312:
+ case PCI_PRODUCT_BROADCOM_BCM57314:
+ case PCI_PRODUCT_BROADCOM_BCM57402:
+ case PCI_PRODUCT_BROADCOM_BCM57404:
+ case PCI_PRODUCT_BROADCOM_BCM57406:
+ case PCI_PRODUCT_BROADCOM_BCM57407:
+ case PCI_PRODUCT_BROADCOM_BCM57412:
+ case PCI_PRODUCT_BROADCOM_BCM57414:
+ case PCI_PRODUCT_BROADCOM_BCM57416:
+ case PCI_PRODUCT_BROADCOM_BCM57416_SFP:
+ case PCI_PRODUCT_BROADCOM_BCM57417:
+ case PCI_PRODUCT_BROADCOM_BCM57417_SFP:
+ sc->sc_flags |= BNXT_FLAG_TPA;
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * TPA aggregation thresholds should be kept in sync with
+ * if_rxr_init() such that the ag ring's low watermark provides
+ * sufficient space for all aggregations allowed here.
+ */
+ sc->sc_hw_lro.is_mode_gro = 0;
+ sc->sc_hw_lro.max_agg_segs = 5; /* 2^5 = 32 segs = BNXT_MAX_RX_SEGS */
+ sc->sc_hw_lro.max_aggs = HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_4;
+ sc->sc_hw_lro.min_agg_len = 512;
+
ifq_init_maxlen(&ifp->if_snd, 1024); /* ? */
ifmedia_init(&sc->sc_media, IFM_IMASK, bnxt_media_change,
@@ -791,6 +846,7 @@ bnxt_queue_up(struct bnxt_softc *sc, struct bnxt_queue
struct bnxt_tx_queue *tx = &bq->q_tx;
struct bnxt_grp_info *rg = &bq->q_rg;
struct bnxt_slot *bs;
+ size_t num_tpa_agg_ids = 0;
int i;
tx->tx_ring_mem = bnxt_dmamem_alloc(sc, PAGE_SIZE);
@@ -920,23 +976,42 @@ bnxt_queue_up(struct bnxt_softc *sc, struct bnxt_queue
goto destroy_rx_slots;
}
+ rx->rx_ag_freemap = malloc(rx->rx_ag_ring.ring_size / 8,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+ if (rx->rx_ag_freemap == NULL) {
+ printf("%s: failed to allocate rx ag freemap\n", DEVNAME(sc));
+ goto destroy_rx_ag_slots;
+ }
+ memset(rx->rx_ag_freemap, 0xff, rx->rx_ag_ring.ring_size / 8);
+
for (i = 0; i < rx->rx_ag_ring.ring_size; i++) {
bs = &rx->rx_ag_slots[i];
- if (bus_dmamap_create(sc->sc_dmat, BNXT_AG_BUFFER_SIZE, 1,
- BNXT_AG_BUFFER_SIZE, 0,
+ if (bus_dmamap_create(sc->sc_dmat, BNXT_AG_BUFFER_SIZE,
+ BNXT_MAX_RX_SEGS, BNXT_AG_BUFFER_SIZE, 0,
BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
&bs->bs_map) != 0) {
printf("%s: failed to allocate rx ag dma maps\n",
DEVNAME(sc));
- goto destroy_rx_ag_slots;
+ goto destroy_rx_ag_freemap;
}
}
+ if ((sc->sc_flags & BNXT_FLAG_TPA) && (ifp->if_xflags & IFXF_LRO)) {
+ num_tpa_agg_ids = (RX_TPA_START_CMPL_AGG_ID_MASK >>
+ RX_TPA_START_CMPL_AGG_ID_SFT),
+ rx->rx_tpas = mallocarray(sizeof(rx->rx_tpas[0]),
+ num_tpa_agg_ids, M_DEVBUF, M_WAITOK | M_ZERO);
+ if (rx->rx_tpas == NULL) {
+ printf("%s: failed to allocate TPA\n", DEVNAME(sc));
+ goto destroy_rx_ag_freemap;
+ }
+ }
+
tx->tx_slots = mallocarray(sizeof(*bs), tx->tx_ring.ring_size,
M_DEVBUF, M_WAITOK | M_ZERO);
if (tx->tx_slots == NULL) {
printf("%s: failed to allocate tx slots\n", DEVNAME(sc));
- goto destroy_rx_ag_slots;
+ goto destroy_rx_tpas;
}
for (i = 0; i < tx->tx_ring.ring_size; i++) {
@@ -958,12 +1033,12 @@ bnxt_queue_up(struct bnxt_softc *sc, struct bnxt_queue
* once the whole ring has been used once, we should be able to back off
* to 2 or so slots, but we currently don't have a way of doing that.
*/
- if_rxr_init(&rx->rxr[0], 32, rx->rx_ring.ring_size - 1);
- if_rxr_init(&rx->rxr[1], 32, rx->rx_ag_ring.ring_size - 1);
+ if_rxr_init(&rx->rxr[0], BNXT_MAX_RX_SEGS, rx->rx_ring.ring_size - 1);
+ if_rxr_init(&rx->rxr[1], BNXT_MAX_RX_SEGS * 4,
+ rx->rx_ag_ring.ring_size - 1);
rx->rx_prod = 0;
rx->rx_cons = 0;
rx->rx_ag_prod = 0;
- rx->rx_ag_cons = 0;
bnxt_rx_fill(bq);
bnxt_rx_fill_ag(bq);
@@ -980,6 +1055,12 @@ destroy_tx_slots:
tx->tx_slots = NULL;
i = rx->rx_ag_ring.ring_size;
+destroy_rx_tpas:
+ free(rx->rx_tpas, M_DEVBUF, num_tpa_agg_ids * sizeof(rx->rx_tpas[0]));
+ rx->rx_tpas = NULL;
+destroy_rx_ag_freemap:
+ free(rx->rx_ag_freemap, M_DEVBUF, rx->rx_ag_ring.ring_size / 8);
+ rx->rx_ag_freemap = NULL;
destroy_rx_ag_slots:
bnxt_free_slots(sc, rx->rx_ag_slots, i, rx->rx_ag_ring.ring_size);
rx->rx_ag_slots = NULL;
@@ -1031,6 +1112,9 @@ bnxt_queue_down(struct bnxt_softc *sc, struct bnxt_que
tx->tx_ring.ring_size);
tx->tx_slots = NULL;
+ free(rx->rx_ag_freemap, M_DEVBUF, rx->rx_ag_ring.ring_size / 8);
+ rx->rx_ag_freemap = NULL;
+
bnxt_free_slots(sc, rx->rx_ag_slots, rx->rx_ag_ring.ring_size,
rx->rx_ag_ring.ring_size);
rx->rx_ag_slots = NULL;
@@ -1161,6 +1245,13 @@ bnxt_up(struct bnxt_softc *sc)
}
}
+ if (bnxt_hwrm_vnic_tpa_cfg(sc) != 0) {
+ printf("%s: failed to set hardware LRO config\n",
+ DEVNAME(sc));
+ ret = EIO;
+ goto dealloc_vnic;
+ }
+
bnxt_iff(sc);
SET(ifp->if_flags, IFF_RUNNING);
@@ -1301,10 +1392,12 @@ bnxt_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data
}
if (error == ENETRESET) {
- if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
- (IFF_UP | IFF_RUNNING))
- bnxt_iff(sc);
error = 0;
+ if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
+ (IFF_UP | IFF_RUNNING)) {
+ bnxt_down(sc);
+ error = bnxt_up(sc);
+ }
}
splx(s);
@@ -1650,6 +1743,15 @@ bnxt_intr(void *xq)
if (ISSET(ifp->if_flags, IFF_RUNNING))
bnxt_txeof(sc, tx, &txfree, cmpl);
break;
+ case CMPL_BASE_TYPE_RX_TPA_START:
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ bnxt_rx_tpa_start(sc, rx, cpr, cmpl);
+ break;
+ case CMPL_BASE_TYPE_RX_TPA_END:
+ if (ISSET(ifp->if_flags, IFF_RUNNING))
+ rollback = bnxt_rx_tpa(sc, rx, cpr, &ml,
+ &rxfree, &agfree, cmpl);
+ break;
default:
printf("%s: unexpected completion type %u\n",
DEVNAME(sc), type);
@@ -1673,15 +1775,12 @@ bnxt_intr(void *xq)
if (rxfree != 0) {
int livelocked = 0;
+ int ag_needs_filling = 0;
rx->rx_cons += rxfree;
if (rx->rx_cons >= rx->rx_ring.ring_size)
rx->rx_cons -= rx->rx_ring.ring_size;
- rx->rx_ag_cons += agfree;
- if (rx->rx_ag_cons >= rx->rx_ag_ring.ring_size)
- rx->rx_ag_cons -= rx->rx_ag_ring.ring_size;
-
if_rxr_put(&rx->rxr[0], rxfree);
if_rxr_put(&rx->rxr[1], agfree);
@@ -1696,9 +1795,13 @@ bnxt_intr(void *xq)
}
bnxt_rx_fill(q);
- bnxt_rx_fill_ag(q);
- if ((rx->rx_cons == rx->rx_prod) ||
- (rx->rx_ag_cons == rx->rx_ag_prod))
+ if (if_rxr_needrefill(&rx->rxr[1]))
+ ag_needs_filling = 1;
+
+ if (ag_needs_filling && bnxt_rx_fill_ag(q) == 0)
+ ag_needs_filling = 0;
+
+ if (rx->rx_cons == rx->rx_prod || ag_needs_filling)
timeout_add(&rx->rx_refill, 0);
}
if (txfree != 0) {
@@ -2211,10 +2314,30 @@ bnxt_write_tx_doorbell(struct bnxt_softc *sc, struct b
htole32(val));
}
+int
+bnxt_next_free_ag_slot(uint8_t *freemap, uint32_t mapsize, uint p)
+{
+ int idx;
+ uint32_t n = 0;
+ uint i = p / 8;
+
+ for (n = 0; n < mapsize; n++) {
+ idx = ffs(freemap[i]);
+ if (idx > 0)
+ return ((idx - 1) + (i * 8));
+ i++;
+ if (i >= mapsize)
+ i = 0;
+ }
+
+ return -1;
+}
+
u_int
-bnxt_rx_fill_slots(struct bnxt_softc *sc, struct bnxt_ring *ring, void *ring_mem,
- struct bnxt_slot *slots, uint *prod, int bufsize, uint16_t bdtype,
- u_int nslots)
+bnxt_rx_fill_slots(struct bnxt_softc *sc, struct bnxt_ring *ring,
+ void *ring_mem, struct bnxt_slot *slots, uint *prod,
+ uint8_t *freemap, uint32_t mapsize,
+ int bufsize, uint16_t bdtype, u_int nslots)
{
struct rx_prod_pkt_bd *rxring;
struct bnxt_slot *bs;
@@ -2224,6 +2347,12 @@ bnxt_rx_fill_slots(struct bnxt_softc *sc, struct bnxt_
rxring = (struct rx_prod_pkt_bd *)ring_mem;
p = *prod;
for (fills = 0; fills < nslots; fills++) {
+ if (freemap != NULL && isclr(freemap, p)) {
+ p = bnxt_next_free_ag_slot(freemap, mapsize, p);
+ if (p == -1)
+ return (nslots - fills);
+ }
+
bs = &slots[p];
m = MCLGETL(NULL, M_DONTWAIT, bufsize);
if (m == NULL)
@@ -2242,6 +2371,9 @@ bnxt_rx_fill_slots(struct bnxt_softc *sc, struct bnxt_
rxring[p].opaque = p;
rxring[p].addr = htole64(bs->bs_map->dm_segs[0].ds_addr);
+ if (freemap != NULL)
+ clrbit(freemap, p);
+
if (++p >= ring->ring_size)
p = 0;
}
@@ -2264,8 +2396,8 @@ bnxt_rx_fill(struct bnxt_queue *q)
slots = if_rxr_get(&rx->rxr[0], rx->rx_ring.ring_size);
if (slots > 0) {
slots = bnxt_rx_fill_slots(sc, &rx->rx_ring,
- BNXT_DMA_KVA(rx->rx_ring_mem), rx->rx_slots,
- &rx->rx_prod, MCLBYTES,
+ BNXT_DMA_KVA(rx->rx_ring_mem), rx->rx_slots, &rx->rx_prod,
+ NULL, 0, MCLBYTES,
RX_PROD_PKT_BD_TYPE_RX_PROD_PKT, slots);
if_rxr_put(&rx->rxr[0], slots);
} else
@@ -2279,19 +2411,23 @@ bnxt_rx_fill_ag(struct bnxt_queue *q)
{
struct bnxt_rx_queue *rx = &q->q_rx;
struct bnxt_softc *sc = q->q_sc;
- u_int slots;
+ u_int slots, unfilled_slots;
int rv = 0;
slots = if_rxr_get(&rx->rxr[1], rx->rx_ag_ring.ring_size);
if (slots > 0) {
- slots = bnxt_rx_fill_slots(sc, &rx->rx_ag_ring,
+ unfilled_slots = bnxt_rx_fill_slots(sc, &rx->rx_ag_ring,
BNXT_DMA_KVA(rx->rx_ring_mem) + PAGE_SIZE,
rx->rx_ag_slots, &rx->rx_ag_prod,
+ rx->rx_ag_freemap, rx->rx_ag_ring.ring_size / 8,
BNXT_AG_BUFFER_SIZE,
RX_PROD_AGG_BD_TYPE_RX_PROD_AGG, slots);
- if_rxr_put(&rx->rxr[1], slots);
- } else
+ if_rxr_put(&rx->rxr[1], unfilled_slots);
+ if (unfilled_slots == slots)
+ rv = 1;
+ } else {
rv = 1;
+ }
return (rv);
}
@@ -2301,15 +2437,18 @@ bnxt_refill(void *xq)
{
struct bnxt_queue *q = xq;
struct bnxt_rx_queue *rx = &q->q_rx;
+ int ag_needs_filling = 0;
if (rx->rx_cons == rx->rx_prod)
bnxt_rx_fill(q);
- if (rx->rx_ag_cons == rx->rx_ag_prod)
- bnxt_rx_fill_ag(q);
+ if (if_rxr_needrefill(&rx->rxr[1]))
+ ag_needs_filling = 1;
- if ((rx->rx_cons == rx->rx_prod) ||
- (rx->rx_ag_cons == rx->rx_ag_prod))
+ if (ag_needs_filling && bnxt_rx_fill_ag(q) == 0)
+ ag_needs_filling = 0;
+
+ if (rx->rx_cons == rx->rx_prod || ag_needs_filling)
timeout_add(&rx->rx_refill, 1);
}
@@ -2318,29 +2457,25 @@ bnxt_rx(struct bnxt_softc *sc, struct bnxt_rx_queue *r
struct bnxt_cp_ring *cpr, struct mbuf_list *ml, struct mbuf_list *mltcp,
int *slots, int *agslots, struct cmpl_base *cmpl)
{
- struct mbuf *m, *am;
+ struct mbuf *m, *am, *aprev = NULL;
struct bnxt_slot *bs;
struct rx_pkt_cmpl *rxlo = (struct rx_pkt_cmpl *)cmpl;
struct rx_pkt_cmpl_hi *rxhi;
- struct rx_abuf_cmpl *ag;
+ struct rx_abuf_cmpl *ag = NULL;
uint32_t flags;
uint16_t errors;
+ uint8_t num_agg;
+ int i;
+ num_agg = (rxlo->agg_bufs_v1 & RX_TPA_END_CMPL_AGG_BUFS_MASK) >>
+ RX_TPA_END_CMPL_AGG_BUFS_SFT;
+
/* second part of the rx completion */
rxhi = (struct rx_pkt_cmpl_hi *)bnxt_cpr_next_cmpl(sc, cpr);
if (rxhi == NULL) {
return (1);
}
- /* packets over 2k in size use an aggregation buffer completion too */
- ag = NULL;
- if ((rxlo->agg_bufs_v1 >> RX_PKT_CMPL_AGG_BUFS_SFT) != 0) {
- ag = (struct rx_abuf_cmpl *)bnxt_cpr_next_cmpl(sc, cpr);
- if (ag == NULL) {
- return (1);
- }
- }
-
bs = &rx->rx_slots[rxlo->opaque];
bus_dmamap_sync(sc->sc_dmat, bs->bs_map, 0, bs->bs_map->dm_mapsize,
BUS_DMASYNC_POSTREAD);
@@ -2376,22 +2511,34 @@ bnxt_rx(struct bnxt_softc *sc, struct bnxt_rx_queue *r
m->m_pkthdr.csum_flags |= M_FLOWID;
}
- if (ag != NULL) {
+ for (i = 0; i < num_agg; i++) {
+ ag = (struct rx_abuf_cmpl *)bnxt_cpr_next_cmpl(sc, cpr);
+ if (ag == NULL)
+ break;
+ if (isset(rx->rx_ag_freemap, ag->opaque))
+ continue;
bs = &rx->rx_ag_slots[ag->opaque];
bus_dmamap_sync(sc->sc_dmat, bs->bs_map, 0,
bs->bs_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(sc->sc_dmat, bs->bs_map);
+ setbit(rx->rx_ag_freemap, ag->opaque);
+
am = bs->bs_m;
bs->bs_m = NULL;
am->m_len = letoh16(ag->len);
- m->m_next = am;
+ if (aprev == NULL)
+ m->m_next = am;
+ else
+ aprev->m_next = am;
+ aprev = am;
m->m_pkthdr.len += am->m_len;
(*agslots)++;
}
#ifndef SMALL_KERNEL
- if (ISSET(sc->sc_ac.ac_if.if_xflags, IFXF_LRO) &&
+ if ((sc->sc_flags & BNXT_FLAG_TPA) == 0 &&
+ ISSET(sc->sc_ac.ac_if.if_xflags, IFXF_LRO) &&
((lemtoh16(&rxlo->flags_type) & RX_PKT_CMPL_FLAGS_ITYPE_TCP) ==
RX_PKT_CMPL_FLAGS_ITYPE_TCP))
tcp_softlro_glue(mltcp, m, &sc->sc_ac.ac_if);
@@ -2403,6 +2550,147 @@ bnxt_rx(struct bnxt_softc *sc, struct bnxt_rx_queue *r
}
void
+bnxt_rx_tpa_start(struct bnxt_softc *sc, struct bnxt_rx_queue *rx,
+ struct bnxt_cp_ring *cpr, struct cmpl_base *cmpl)
+{
+ struct rx_tpa_start_cmpl *rtpa = (struct rx_tpa_start_cmpl *)cmpl;
+ struct rx_tpa_start_cmpl_hi *rtpa_hi;
+ uint8_t agg_id;
+
+ /* second part of the rx tpa start completion */
+ rtpa_hi = (struct rx_tpa_start_cmpl_hi *)bnxt_cpr_next_cmpl(sc, cpr);
+ if (rtpa_hi == NULL)
+ return;
+
+ agg_id = (rtpa->agg_id & RX_TPA_START_CMPL_AGG_ID_MASK) >>
+ RX_TPA_START_CMPL_AGG_ID_SFT;
+
+ memcpy(&rx->rx_tpas[agg_id].low, rtpa, sizeof(*rtpa));
+ memcpy(&rx->rx_tpas[agg_id].high, rtpa_hi, sizeof(*rtpa_hi));
+}
+
+int
+bnxt_rx_tpa(struct bnxt_softc *sc, struct bnxt_rx_queue *rx,
+ struct bnxt_cp_ring *cpr, struct mbuf_list *ml, int *slots, int *agslots,
+ struct cmpl_base *cmpl)
+{
+ struct mbuf *m, *am, *aprev = NULL;
+ struct bnxt_slot *bs;
+ struct rx_tpa_end_cmpl *rxt = (struct rx_tpa_end_cmpl *)cmpl;
+ struct rx_tpa_end_cmpl_hi *rxthi;
+ struct bnxt_full_tpa_start *tpas;
+ struct rx_abuf_cmpl *ag;
+ uint32_t flags;
+ uint8_t i, num_agg;
+ uint8_t agg_id;
+
+ /* second part of the rx completion */
+ rxthi = (struct rx_tpa_end_cmpl_hi *)bnxt_cpr_next_cmpl(sc, cpr);
+ if (rxthi == NULL)
+ return (1);
+
+ agg_id = (rxt->agg_id & RX_TPA_END_CMPL_AGG_ID_MASK) >>
+ RX_TPA_END_CMPL_AGG_ID_SFT;
+ tpas = &rx->rx_tpas[agg_id];
+
+ num_agg = (rxt->agg_bufs_v1 & RX_TPA_END_CMPL_AGG_BUFS_MASK) >>
+ RX_TPA_END_CMPL_AGG_BUFS_SFT;
+
+ bs = &rx->rx_slots[tpas->low.opaque];
+ bus_dmamap_sync(sc->sc_dmat, bs->bs_map, 0, bs->bs_map->dm_mapsize,
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(sc->sc_dmat, bs->bs_map);
+
+ m = bs->bs_m;
+ bs->bs_m = NULL;
+ m->m_pkthdr.len = m->m_len = letoh16(tpas->low.len);
+ (*slots)++;
+
+ /* checksum flags */
+ flags = lemtoh32(&tpas->high.flags2);
+ if (flags & RX_TPA_START_CMPL_FLAGS2_IP_CS_CALC)
+ m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
+
+ if (flags & RX_TPA_START_CMPL_FLAGS2_L4_CS_CALC) {
+ m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK |
+ M_UDP_CSUM_IN_OK;
+ }
+#if NVLAN > 0
+ if ((flags & RX_TPA_START_CMPL_FLAGS2_META_FORMAT_MASK) ==
+ RX_TPA_START_CMPL_FLAGS2_META_FORMAT_VLAN) {
+ m->m_pkthdr.ether_vtag = lemtoh16(&tpas->high.metadata);
+ m->m_flags |= M_VLANTAG;
+ }
+#endif
+
+ if (lemtoh16(&tpas->low.flags_type) &
+ RX_TPA_START_CMPL_FLAGS_RSS_VALID) {
+ m->m_pkthdr.ph_flowid = lemtoh32(&tpas->low.rss_hash);
+ m->m_pkthdr.csum_flags |= M_FLOWID;
+ }
+
+ for (i = 0; i < num_agg; i++) {
+ ag = (struct rx_abuf_cmpl *)bnxt_cpr_next_cmpl(sc, cpr);
+ if (ag == NULL)
+ break;
+ if (isset(rx->rx_ag_freemap, ag->opaque))
+ continue;
+
+ bs = &rx->rx_ag_slots[ag->opaque];
+ bus_dmamap_sync(sc->sc_dmat, bs->bs_map, 0,
+ bs->bs_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(sc->sc_dmat, bs->bs_map);
+
+ setbit(rx->rx_ag_freemap, ag->opaque);
+
+ am = bs->bs_m;
+ bs->bs_m = NULL;
+ am->m_len = letoh16(ag->len);
+ if (aprev == NULL)
+ m->m_next = am;
+ else
+ aprev->m_next = am;
+ aprev = am;
+ m->m_pkthdr.len += am->m_len;
+ (*agslots)++;
+ }
+
+ bs = &rx->rx_slots[rxt->opaque];
+ bus_dmamap_sync(sc->sc_dmat, bs->bs_map, 0, bs->bs_map->dm_mapsize,
+ BUS_DMASYNC_POSTREAD);
+ bus_dmamap_unload(sc->sc_dmat, bs->bs_map);
+
+ am = bs->bs_m;
+ bs->bs_m = NULL;
+ am->m_len = letoh16(rxt->len);
+ if (aprev == NULL)
+ m->m_next = am;
+ else
+ aprev->m_next = am;
+ m->m_pkthdr.len += am->m_len;
+ (*slots)++;
+
+ flags = lemtoh16(&tpas->low.flags_type);
+ if ((flags & RX_TPA_START_CMPL_FLAGS_ITYPE_TCP) ==
+ RX_TPA_START_CMPL_FLAGS_ITYPE_TCP) {
+ uint16_t payload_offset = rxt->payload_offset;
+
+ if (payload_offset == 0)
+ payload_offset = 256;
+
+ if (m->m_pkthdr.len > payload_offset)
+ m->m_pkthdr.ph_mss = m->m_pkthdr.len - payload_offset;
+
+ tcpstat_add(tcps_inhwlro, i + 1);
+ tcpstat_inc(tcps_inpktlro);
+
+ }
+
+ ml_enqueue(ml, m);
+ return (0);
+}
+
+void
bnxt_txeof(struct bnxt_softc *sc, struct bnxt_tx_queue *tx, int *txfree,
struct cmpl_base *cmpl)
{
@@ -3346,64 +3634,48 @@ bnxt_cfg_async_cr(struct bnxt_softc *softc, struct bnx
return rc;
}
-#if 0
-
-void
-bnxt_validate_hw_lro_settings(struct bnxt_softc *softc)
-{
- softc->hw_lro.enable = min(softc->hw_lro.enable, 1);
-
- softc->hw_lro.is_mode_gro = min(softc->hw_lro.is_mode_gro, 1);
-
- softc->hw_lro.max_agg_segs = min(softc->hw_lro.max_agg_segs,
- HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX);
-
- softc->hw_lro.max_aggs = min(softc->hw_lro.max_aggs,
- HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX);
-
- softc->hw_lro.min_agg_len = min(softc->hw_lro.min_agg_len, BNXT_MAX_MTU);
-}
-
int
bnxt_hwrm_vnic_tpa_cfg(struct bnxt_softc *softc)
{
+ struct ifnet *ifp = &softc->sc_ac.ac_if;
struct hwrm_vnic_tpa_cfg_input req = {0};
uint32_t flags;
- if (softc->vnic_info.id == (uint16_t) HWRM_NA_SIGNATURE) {
+ if (softc->sc_vnic.id == (uint16_t) HWRM_NA_SIGNATURE ||
+ (softc->sc_flags & BNXT_FLAG_TPA) == 0)
return 0;
- }
bnxt_hwrm_cmd_hdr_init(softc, &req, HWRM_VNIC_TPA_CFG);
- if (softc->hw_lro.enable) {
+ if (ifp->if_xflags & IFXF_LRO) {
flags = HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA |
HWRM_VNIC_TPA_CFG_INPUT_FLAGS_ENCAP_TPA |
HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_ECN |
HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ;
- if (softc->hw_lro.is_mode_gro)
+ if (softc->sc_hw_lro.is_mode_gro)
flags |= HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO;
else
flags |= HWRM_VNIC_TPA_CFG_INPUT_FLAGS_RSC_WND_UPDATE;
req.flags = htole32(flags);
- req.enables = htole32(HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_SEGS |
- HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGGS |
- HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MIN_AGG_LEN);
+ req.enables = htole32(
+ HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_SEGS |
+ HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGGS |
+ HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MIN_AGG_LEN);
- req.max_agg_segs = htole16(softc->hw_lro.max_agg_segs);
- req.max_aggs = htole16(softc->hw_lro.max_aggs);
- req.min_agg_len = htole32(softc->hw_lro.min_agg_len);
+ req.max_agg_segs = htole16(softc->sc_hw_lro.max_agg_segs);
+ req.max_aggs = htole16(softc->sc_hw_lro.max_aggs);
+ req.min_agg_len = htole32(softc->sc_hw_lro.min_agg_len);
}
- req.vnic_id = htole16(softc->vnic_info.id);
+ req.vnic_id = htole16(softc->sc_vnic.id);
return hwrm_send_message(softc, &req, sizeof(req));
}
-
+#if 0
int
bnxt_hwrm_fw_reset(struct bnxt_softc *softc, uint8_t processor,
uint8_t *selfreset)
hardware LRO support for bnxt(4)