Index | Thread | Search

From:
Visa Hankala <visa@hankala.org>
Subject:
Re: octeon: commuliative patch LRO, cnmac queue and softens
To:
"Kirill A. Korinsky" <kirill@korins.ky>
Cc:
tech@openbsd.org
Date:
Mon, 27 Apr 2026 15:14:17 +0000

Download raw body.

Thread
On Sun, Apr 26, 2026 at 10:19:22PM +0200, Kirill A. Korinsky wrote:
> On Sun, 26 Apr 2026 18:22:35 +0200,
> Visa Hankala <visa@hankala.org> wrote:
> > 
> > On Sun, Apr 05, 2026 at 03:46:51PM +0200, Kirill A. Korinsky wrote:
> > > Here a cumulative diff (LRO + multiple queue) which address all your remakrs
> > > and excluded already commited parts by kn@ and me.
> > 
> > Some minor comments below:
> >
> 
> Thanks, I like your idea of simplification for cnmac_nrxgroups.
> 
> Original code was defently overcomplicated.
> 
> Here updated diff which addressed all your remarks, and contains only not
> commited hunks:

Tested with CN5020, CN6120 and CN7130. TCP/UDP traffic gets distributed
by (saddr, daddr, sport, dport) tuple. ESP uses (saddr, daddr) tuple.
QinQ-tagged traffic gets assigned to core 0, as expected because the
hardware does not know QinQ.

OK visa@

> Index: sys/arch/octeon/dev/cn30xxpip.c
> ===================================================================
> RCS file: /home/cvs/src/sys/arch/octeon/dev/cn30xxpip.c,v
> diff -u -p -r1.11 cn30xxpip.c
> --- sys/arch/octeon/dev/cn30xxpip.c	28 Dec 2022 01:39:21 -0000	1.11
> +++ sys/arch/octeon/dev/cn30xxpip.c	26 Apr 2026 18:36:04 -0000
> @@ -57,6 +57,7 @@ cn30xxpip_init(struct cn30xxpip_attach_a
>  	sc->sc_regt = aa->aa_regt;
>  	sc->sc_tag_type = aa->aa_tag_type;
>  	sc->sc_receive_group = aa->aa_receive_group;
> +	sc->sc_receive_group_order = aa->aa_receive_group_order;
>  	sc->sc_ip_offset = aa->aa_ip_offset;
>  
>  	status = bus_space_map(sc->sc_regt, PIP_BASE, PIP_SIZE, 0,
> @@ -88,6 +89,7 @@ cn30xxpip_port_config(struct cn30xxpip_s
>  	uint64_t prt_cfg;
>  	uint64_t prt_tag;
>  	uint64_t ip_offset;
> +	uint64_t group_mask;
>  
>  	/*
>  	 * Process the headers and place the IP header in the work queue
> @@ -108,22 +110,30 @@ cn30xxpip_port_config(struct cn30xxpip_s
>  	/* SKIP=0 */
>  
>  	prt_tag = 0;
> -	SET(prt_tag, PIP_PRT_TAGN_INC_PRT);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP6_DPRT);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP4_DPRT);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP6_SPRT);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP4_SPRT);
> +	CLR(prt_tag, PIP_PRT_TAGN_INC_VLAN);
> +	CLR(prt_tag, PIP_PRT_TAGN_INC_PRT);
> +	SET(prt_tag, PIP_PRT_TAGN_IP6_DPRT);
> +	SET(prt_tag, PIP_PRT_TAGN_IP4_DPRT);
> +	SET(prt_tag, PIP_PRT_TAGN_IP6_SPRT);
> +	SET(prt_tag, PIP_PRT_TAGN_IP4_SPRT);
>  	CLR(prt_tag, PIP_PRT_TAGN_IP6_NXTH);
>  	CLR(prt_tag, PIP_PRT_TAGN_IP4_PCTL);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP6_DST);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP4_SRC);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP6_SRC);
> -	CLR(prt_tag, PIP_PRT_TAGN_IP4_DST);
> +	SET(prt_tag, PIP_PRT_TAGN_IP6_DST);
> +	SET(prt_tag, PIP_PRT_TAGN_IP4_SRC);
> +	SET(prt_tag, PIP_PRT_TAGN_IP6_SRC);
> +	SET(prt_tag, PIP_PRT_TAGN_IP4_DST);
>  	SET(prt_tag, PIP_PRT_TAGN_TCP6_TAG_ORDERED);
>  	SET(prt_tag, PIP_PRT_TAGN_TCP4_TAG_ORDERED);
>  	SET(prt_tag, PIP_PRT_TAGN_IP6_TAG_ORDERED);
>  	SET(prt_tag, PIP_PRT_TAGN_IP4_TAG_ORDERED);
>  	SET(prt_tag, PIP_PRT_TAGN_NON_TAG_ORDERED);
> +	if (sc->sc_receive_group_order > 0) {
> +		group_mask = ~((1U << sc->sc_receive_group_order) - 1U);
> +		SET(prt_tag, ((uint64_t)sc->sc_receive_group << 36) &
> +		    PIP_PRT_TAGN_GRPTAGBASE);
> +		SET(prt_tag, (group_mask << 32) & PIP_PRT_TAGN_GRPTAGMASK);
> +		SET(prt_tag, PIP_PRT_TAGN_GRPTAG);
> +	}
>  	SET(prt_tag, sc->sc_receive_group & PIP_PRT_TAGN_GRP);
>  
>  	ip_offset = 0;
> Index: sys/arch/octeon/dev/cn30xxpipvar.h
> ===================================================================
> RCS file: /home/cvs/src/sys/arch/octeon/dev/cn30xxpipvar.h,v
> diff -u -p -r1.6 cn30xxpipvar.h
> --- sys/arch/octeon/dev/cn30xxpipvar.h	20 May 2024 23:13:33 -0000	1.6
> +++ sys/arch/octeon/dev/cn30xxpipvar.h	26 Apr 2026 18:36:04 -0000
> @@ -41,6 +41,7 @@ struct cn30xxpip_softc {
>  	bus_space_handle_t	sc_regh_stat;
>  	int			sc_tag_type;
>  	int			sc_receive_group;
> +	int			sc_receive_group_order;
>  	size_t			sc_ip_offset;
>  };
>  
> @@ -50,6 +51,7 @@ struct cn30xxpip_attach_args {
>  	bus_space_tag_t		aa_regt;
>  	int			aa_tag_type;
>  	int			aa_receive_group;
> +	int			aa_receive_group_order;
>  	size_t			aa_ip_offset;
>  };
>  
> Index: sys/arch/octeon/dev/if_cnmac.c
> ===================================================================
> RCS file: /home/cvs/src/sys/arch/octeon/dev/if_cnmac.c,v
> diff -u -p -r1.88 if_cnmac.c
> --- sys/arch/octeon/dev/if_cnmac.c	22 Apr 2026 19:11:04 -0000	1.88
> +++ sys/arch/octeon/dev/if_cnmac.c	26 Apr 2026 19:55:14 -0000
> @@ -159,6 +159,9 @@ int	cnmac_send(struct cnmac_softc *, str
>  int	cnmac_reset(struct cnmac_softc *);
>  int	cnmac_configure(struct cnmac_softc *);
>  int	cnmac_configure_common(struct cnmac_softc *);
> +void	cnmac_rx_groups_init(void);
> +void	cnmac_rx_groups_config(struct cn30xxpow_softc *);
> +void	cnmac_rx_groups_barrier(void);
>  
>  void	cnmac_free_task(void *);
>  void	cnmac_tick_free(void *arg);
> @@ -187,6 +190,15 @@ const struct cfattach cnmac_ca = {
>  
>  struct cfdriver cnmac_cd = { NULL, "cnmac", DV_IFNET };
>  
> +#define CNMAC_PIP_PORT_MAX	64
> +
> +struct cnmac_rx_group {
> +	unsigned int		crg_group;
> +	void			*crg_ih;
> +	char			crg_name[IFNAMSIZ];
> +	struct mbuf_list	crg_rx_batch[CNMAC_PIP_PORT_MAX];
> +} __aligned(CACHELINESIZE);
> +
>  /* ---- buffer management */
>  
>  const struct cnmac_pool_param {
> @@ -209,7 +221,10 @@ uint64_t cnmac_mac_addr = 0;
>  uint32_t cnmac_mac_addr_offset = 0;
>  
>  int	cnmac_mbufs_to_alloc;
> -int	cnmac_npowgroups = 0;
> +unsigned int cnmac_nrxgroups = 0;
> +unsigned int cnmac_nrxgroups_order = 0;
> +struct cnmac_softc *cnmac_port_softc[CNMAC_PIP_PORT_MAX];
> +struct cnmac_rx_group cnmac_rx_groups[OCTEON_POW_GROUP_MAX];
>  
>  void
>  cnmac_buf_init(struct cnmac_softc *sc)
> @@ -230,6 +245,50 @@ cnmac_buf_init(struct cnmac_softc *sc)
>  	}
>  }
>  
> +void
> +cnmac_rx_groups_init(void)
> +{
> +	struct cnmac_rx_group *crg;
> +	unsigned int i, target;
> +
> +	if (cnmac_nrxgroups != 0)
> +		return;
> +
> +	target = min(softnet_count(), OCTEON_POW_GROUP_MAX);
> +	cnmac_nrxgroups_order = fls(target) - 1;
> +	cnmac_nrxgroups = 1U << cnmac_nrxgroups_order;
> +
> +	for (i = 0; i < cnmac_nrxgroups; i++) {
> +		crg = &cnmac_rx_groups[i];
> +		crg->crg_group = i;
> +		snprintf(crg->crg_name, sizeof(crg->crg_name),
> +		    "cnmacrx%u", i);
> +		crg->crg_ih = octeon_intr_establish(POW_WORKQ_IRQ(i),
> +		    IPL_NET | IPL_MPSAFE, cnmac_intr, crg, crg->crg_name);
> +		if (crg->crg_ih == NULL)
> +			panic("%s: could not set up interrupt",
> +			    crg->crg_name);
> +	}
> +}
> +
> +void
> +cnmac_rx_groups_config(struct cn30xxpow_softc *pow)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < cnmac_nrxgroups; i++)
> +		cn30xxpow_config(pow, i);
> +}
> +
> +void
> +cnmac_rx_groups_barrier(void)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < cnmac_nrxgroups; i++)
> +		intr_barrier(cnmac_rx_groups[i].crg_ih);
> +}
> +
>  /* ---- autoconf */
>  
>  int
> @@ -251,11 +310,6 @@ cnmac_attach(struct device *parent, stru
>  	struct cn30xxgmx_attach_args *ga = aux;
>  	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
>  
> -	if (cnmac_npowgroups >= OCTEON_POW_GROUP_MAX) {
> -		printf(": out of POW groups\n");
> -		return;
> -	}
> -
>  	atomic_add_int(&cnmac_mbufs_to_alloc,
>  	    cnmac_mbuf_alloc(CNMAC_MBUFS_PER_PORT));
>  
> @@ -267,7 +321,6 @@ cnmac_attach(struct device *parent, stru
>  	sc->sc_gmx_port = ga->ga_gmx_port;
>  	sc->sc_smi = ga->ga_smi;
>  	sc->sc_phy_addr = ga->ga_phy_addr;
> -	sc->sc_powgroup = cnmac_npowgroups++;
>  
>  	sc->sc_init_flag = 0;
>  
> @@ -287,6 +340,10 @@ cnmac_attach(struct device *parent, stru
>  	task_set(&sc->sc_free_task, cnmac_free_task, sc);
>  	timeout_set(&sc->sc_tick_misc_ch, cnmac_tick_misc, sc);
>  	timeout_set(&sc->sc_tick_free_ch, cnmac_tick_free, sc);
> +	cnmac_rx_groups_init();
> +	KASSERT(sc->sc_port < nitems(cnmac_port_softc));
> +	KASSERT(cnmac_port_softc[sc->sc_port] == NULL);
> +	cnmac_port_softc[sc->sc_port] = sc;
>  
>  	cn30xxfau_op_init(&sc->sc_fau_done,
>  	    OCTEON_CVMSEG_ETHER_OFFSET(sc->sc_dev.dv_unit, csm_ether_fau_done),
> @@ -331,17 +388,13 @@ cnmac_attach(struct device *parent, stru
>  
>  	if_attach(ifp);
>  	ether_ifattach(ifp);
> +	if_attach_iqueues(ifp, cnmac_nrxgroups);
>  
>  	cnmac_buf_init(sc);
>  
>  #if NKSTAT > 0
>  	cnmac_kstat_attach(sc);
>  #endif
> -
> -	sc->sc_ih = octeon_intr_establish(POW_WORKQ_IRQ(sc->sc_powgroup),
> -	    IPL_NET | IPL_MPSAFE, cnmac_intr, sc, sc->sc_dev.dv_xname);
> -	if (sc->sc_ih == NULL)
> -		panic("%s: could not set up interrupt", sc->sc_dev.dv_xname);
>  }
>  
>  /* ---- submodules */
> @@ -354,7 +407,8 @@ cnmac_pip_init(struct cnmac_softc *sc)
>  	pip_aa.aa_port = sc->sc_port;
>  	pip_aa.aa_regt = sc->sc_regt;
>  	pip_aa.aa_tag_type = POW_TAG_TYPE_ORDERED/* XXX */;
> -	pip_aa.aa_receive_group = sc->sc_powgroup;
> +	pip_aa.aa_receive_group = 0;
> +	pip_aa.aa_receive_group_order = cnmac_nrxgroups_order;
>  	pip_aa.aa_ip_offset = sc->sc_ip_offset;
>  	cn30xxpip_init(&pip_aa, &sc->sc_pip);
>  	cn30xxpip_port_config(sc->sc_pip);
> @@ -1061,7 +1115,7 @@ cnmac_stop(struct ifnet *ifp, int disabl
>  
>  	cn30xxgmx_port_enable(sc->sc_gmx_port, 0);
>  
> -	intr_barrier(sc->sc_ih);
> +	cnmac_rx_groups_barrier();
>  	ifq_barrier(&ifp->if_snd);
>  
>  	ifq_clr_oactive(&ifp->if_snd);
> @@ -1093,7 +1147,7 @@ cnmac_configure(struct cnmac_softc *sc)
>  
>  	cn30xxpko_port_config(sc->sc_pko);
>  	cn30xxpko_port_enable(sc->sc_pko, 1);
> -	cn30xxpow_config(sc->sc_pow, sc->sc_powgroup);
> +	cnmac_rx_groups_config(sc->sc_pow);
>  
>  	cn30xxgmx_port_enable(sc->sc_gmx_port, 1);
>  
> @@ -1247,9 +1301,10 @@ cnmac_recv(struct cnmac_softc *sc, uint6
>  {
>  	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
>  	struct mbuf *m;
> -	uint64_t word2;
> +	uint64_t word1, word2;
>  	int nmbuf = 0;
>  
> +	word1 = work[1];
>  	word2 = work[2];
>  
>  	if (!(ifp->if_flags & IFF_RUNNING))
> @@ -1267,6 +1322,8 @@ cnmac_recv(struct cnmac_softc *sc, uint6
>  	}
>  
>  	m->m_pkthdr.csum_flags = 0;
> +	m->m_pkthdr.ph_flowid = word1 & PIP_WQE_WORD1_TAG;
> +	SET(m->m_pkthdr.csum_flags, M_FLOWID);
>  	if (__predict_true(!ISSET(word2, PIP_WQE_WORD2_IP_NI))) {
>  		/* Check IP checksum status. */
>  		if (!ISSET(word2, PIP_WQE_WORD2_IP_V6) &&
> @@ -1307,16 +1364,20 @@ drop:
>  int
>  cnmac_intr(void *arg)
>  {
> -	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> -	struct cnmac_softc *sc = arg;
> -	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
> +	struct cnmac_rx_group *crg = arg;
> +	struct cn30xxpow_softc *pow = &cn30xxpow_softc;
> +	struct cnmac_softc *sc;
> +	struct ifnet *ifp;
> +	struct mbuf_list *ml;
>  	uint64_t *work;
> -	uint64_t wqmask = 1ull << sc->sc_powgroup;
> +	uint64_t pending = 0;
> +	uint64_t wqmask = 1ull << crg->crg_group;
>  	uint32_t coreid = octeon_get_coreid();
> -	uint32_t port;
> +	unsigned int port;
> +	unsigned int i;
>  	int nmbuf = 0;
>  
> -	_POW_WR8(sc->sc_pow, POW_PP_GRP_MSK_OFFSET(coreid), wqmask);
> +	_POW_WR8(pow, POW_PP_GRP_MSK_OFFSET(coreid), wqmask);
>  
>  	cn30xxpow_tag_sw_wait();
>  	cn30xxpow_work_request_async(OCTEON_CVMSEG_OFFSET(csm_pow_intr),
> @@ -1333,18 +1394,30 @@ cnmac_intr(void *arg)
>  		    OCTEON_CVMSEG_OFFSET(csm_pow_intr), POW_NO_WAIT);
>  
>  		port = (work[1] & PIP_WQE_WORD1_IPRT) >> 42;
> -		if (port != sc->sc_port) {
> -			printf("%s: unexpected wqe port %u, should be %u\n",
> -			    sc->sc_dev.dv_xname, port, sc->sc_port);
> +		if (port >= nitems(cnmac_port_softc) ||
> +		    (sc = cnmac_port_softc[port]) == NULL) {
> +			printf("%s: unexpected wqe port %u\n",
> +			    crg->crg_name, port);
>  			goto wqe_error;
>  		}
>  
> -		nmbuf += cnmac_recv(sc, work, &ml);
> +		if ((pending & (1ULL << port)) == 0) {
> +			ml_init(&crg->crg_rx_batch[port]);
> +			pending |= 1ULL << port;
> +		}
> +		nmbuf += cnmac_recv(sc, work, &crg->crg_rx_batch[port]);
>  	}
>  
> -	_POW_WR8(sc->sc_pow, POW_WQ_INT_OFFSET, wqmask);
> +	_POW_WR8(pow, POW_WQ_INT_OFFSET, wqmask);
>  
> -	if_input(ifp, &ml);
> +	while (pending) {
> +		i = __builtin_ffsll(pending) - 1;
> +		sc = cnmac_port_softc[i];
> +		ifp = &sc->sc_arpcom.ac_if;
> +		ml = &crg->crg_rx_batch[i];
> +		ifiq_input(ifp->if_iqs[crg->crg_group], ml);
> +		pending &= pending - 1;
> +	}
>  
>  	nmbuf = cnmac_mbuf_alloc(nmbuf);
>  	if (nmbuf != 0)
> Index: sys/arch/octeon/dev/if_cnmacvar.h
> ===================================================================
> RCS file: /home/cvs/src/sys/arch/octeon/dev/if_cnmacvar.h,v
> diff -u -p -r1.20 if_cnmacvar.h
> --- sys/arch/octeon/dev/if_cnmacvar.h	28 Dec 2022 01:39:21 -0000	1.20
> +++ sys/arch/octeon/dev/if_cnmacvar.h	26 Apr 2026 18:36:04 -0000
> @@ -63,7 +63,6 @@ struct cnmac_softc {
>  
>  	bus_dmamap_t		sc_dmap;
>  
> -	void			*sc_ih;
>  	struct cn30xxpip_softc	*sc_pip;
>  	struct cn30xxipd_softc	*sc_ipd;
>  	struct cn30xxpko_softc	*sc_pko;
> @@ -92,7 +91,6 @@ struct cnmac_softc {
>  	uint32_t		sc_port_type;
>  	uint32_t		sc_init_flag;
>  	int			sc_phy_addr;
> -	int			sc_powgroup;
>  
>  	/*
>  	 * Redirection - received (input) packets are redirected (directly sent)
> 
> 
> -- 
> wbr, Kirill
>