From: Visa Hankala Subject: Re: octeon: commuliative patch LRO, cnmac queue and softens To: "Kirill A. Korinsky" Cc: tech@openbsd.org Date: Fri, 3 Apr 2026 14:28:34 +0000 On Wed, Apr 01, 2026 at 06:32:26PM +0200, Kirill A. Korinsky wrote: > tech@, > > here a cumulative patch which I'm working to improve cnmac on octeon. > > It includes: > - LRO: https://marc.info/?l=openbsd-tech&m=177497356705869&w=2 > - Softnet: https://marc.info/?l=openbsd-bugs&m=177506284620902&w=2 > > Plus new work where I introduced support of RX queue and moved away from > hardcoded POW groups which maps each port to dedicated CPU. > > All together it allows me to have with single iperf stream: > > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-30.00 sec 2.36 GBytes 677 Mbits/sec 616152 sender > [ 5] 0.00-30.00 sec 2.36 GBytes 676 Mbits/sec receiver > > with -P 2: > > [ ID] Interval Transfer Bitrate Retr > [ 5] 0.00-30.00 sec 1.72 GBytes 492 Mbits/sec 451776 sender > [ 5] 0.00-30.01 sec 1.72 GBytes 491 Mbits/sec receiver > [ 7] 0.00-30.00 sec 1.38 GBytes 396 Mbits/sec 314216 sender > [ 7] 0.00-30.01 sec 1.38 GBytes 396 Mbits/sec receiver > [SUM] 0.00-30.00 sec 3.10 GBytes 888 Mbits/sec 765992 sender > [SUM] 0.00-30.01 sec 3.10 GBytes 887 Mbits/sec receiver > > on test stand where machines on different vlan but on the same cnamc with > enabled pf. > > Feedback? Objection? OK? I will take a closer look in the near future. Some quick comments below: > > The diff: > > Index: sys/arch/mips64/mips64/cpu.c > =================================================================== > RCS file: /home/cvs/src/sys/arch/mips64/mips64/cpu.c,v > diff -u -p -r1.85 cpu.c > --- sys/arch/mips64/mips64/cpu.c 5 Jun 2025 09:29:54 -0000 1.85 > +++ sys/arch/mips64/mips64/cpu.c 1 Apr 2026 16:16:36 -0000 > @@ -99,6 +99,7 @@ cpuattach(struct device *parent, struct > ci->ci_next = cpu_info_list->ci_next; > cpu_info_list->ci_next = ci; > ci->ci_flags |= CPUF_PRESENT; > + ncpus++; > } > #else > ci = &cpu_info_primary; > Index: sys/arch/octeon/dev/cn30xxpip.c > =================================================================== > RCS file: /home/cvs/src/sys/arch/octeon/dev/cn30xxpip.c,v > diff -u -p -r1.11 cn30xxpip.c > --- sys/arch/octeon/dev/cn30xxpip.c 28 Dec 2022 01:39:21 -0000 1.11 > +++ sys/arch/octeon/dev/cn30xxpip.c 1 Apr 2026 16:16:23 -0000 > @@ -57,6 +57,7 @@ cn30xxpip_init(struct cn30xxpip_attach_a > sc->sc_regt = aa->aa_regt; > sc->sc_tag_type = aa->aa_tag_type; > sc->sc_receive_group = aa->aa_receive_group; > + sc->sc_receive_group_order = aa->aa_receive_group_order; > sc->sc_ip_offset = aa->aa_ip_offset; > > status = bus_space_map(sc->sc_regt, PIP_BASE, PIP_SIZE, 0, > @@ -88,6 +89,7 @@ cn30xxpip_port_config(struct cn30xxpip_s > uint64_t prt_cfg; > uint64_t prt_tag; > uint64_t ip_offset; > + uint64_t group_mask; > > /* > * Process the headers and place the IP header in the work queue > @@ -108,22 +110,30 @@ cn30xxpip_port_config(struct cn30xxpip_s > /* SKIP=0 */ > > prt_tag = 0; > + SET(prt_tag, PIP_PRT_TAGN_INC_VLAN); > SET(prt_tag, PIP_PRT_TAGN_INC_PRT); I wonder if VLAN id and input port number should be left out from the packet tag. This would make the tag symmetric with regards to IP addresses and TCP/UDP ports, and let the same CPU core handle both directions of TCP/UDP flows. This might improve CPU cache locality and performance when forwarding multiple flows. Of course, the symmetricity is lost if packets are transformed for example by NAT or tunneling. > - CLR(prt_tag, PIP_PRT_TAGN_IP6_DPRT); > - CLR(prt_tag, PIP_PRT_TAGN_IP4_DPRT); > - CLR(prt_tag, PIP_PRT_TAGN_IP6_SPRT); > - CLR(prt_tag, PIP_PRT_TAGN_IP4_SPRT); > + SET(prt_tag, PIP_PRT_TAGN_IP6_DPRT); > + SET(prt_tag, PIP_PRT_TAGN_IP4_DPRT); > + SET(prt_tag, PIP_PRT_TAGN_IP6_SPRT); > + SET(prt_tag, PIP_PRT_TAGN_IP4_SPRT); > CLR(prt_tag, PIP_PRT_TAGN_IP6_NXTH); > CLR(prt_tag, PIP_PRT_TAGN_IP4_PCTL); > - CLR(prt_tag, PIP_PRT_TAGN_IP6_DST); > - CLR(prt_tag, PIP_PRT_TAGN_IP4_SRC); > - CLR(prt_tag, PIP_PRT_TAGN_IP6_SRC); > - CLR(prt_tag, PIP_PRT_TAGN_IP4_DST); > + SET(prt_tag, PIP_PRT_TAGN_IP6_DST); > + SET(prt_tag, PIP_PRT_TAGN_IP4_SRC); > + SET(prt_tag, PIP_PRT_TAGN_IP6_SRC); > + SET(prt_tag, PIP_PRT_TAGN_IP4_DST); > SET(prt_tag, PIP_PRT_TAGN_TCP6_TAG_ORDERED); > SET(prt_tag, PIP_PRT_TAGN_TCP4_TAG_ORDERED); > SET(prt_tag, PIP_PRT_TAGN_IP6_TAG_ORDERED); > SET(prt_tag, PIP_PRT_TAGN_IP4_TAG_ORDERED); > SET(prt_tag, PIP_PRT_TAGN_NON_TAG_ORDERED); > + if (sc->sc_receive_group_order > 0) { > + group_mask = ~((1U << sc->sc_receive_group_order) - 1U); > + SET(prt_tag, ((uint64_t)sc->sc_receive_group << 36) & > + PIP_PRT_TAGN_GRPTAGBASE); > + SET(prt_tag, (group_mask << 32) & PIP_PRT_TAGN_GRPTAGMASK); > + SET(prt_tag, PIP_PRT_TAGN_GRPTAG); > + } > SET(prt_tag, sc->sc_receive_group & PIP_PRT_TAGN_GRP); > > ip_offset = 0; > Index: sys/arch/octeon/dev/cn30xxpipvar.h > =================================================================== > RCS file: /home/cvs/src/sys/arch/octeon/dev/cn30xxpipvar.h,v > diff -u -p -r1.6 cn30xxpipvar.h > --- sys/arch/octeon/dev/cn30xxpipvar.h 20 May 2024 23:13:33 -0000 1.6 > +++ sys/arch/octeon/dev/cn30xxpipvar.h 1 Apr 2026 16:16:23 -0000 > @@ -41,6 +41,7 @@ struct cn30xxpip_softc { > bus_space_handle_t sc_regh_stat; > int sc_tag_type; > int sc_receive_group; > + int sc_receive_group_order; > size_t sc_ip_offset; > }; > > @@ -50,6 +51,7 @@ struct cn30xxpip_attach_args { > bus_space_tag_t aa_regt; > int aa_tag_type; > int aa_receive_group; > + int aa_receive_group_order; > size_t aa_ip_offset; > }; > > Index: sys/arch/octeon/dev/if_cnmac.c > =================================================================== > RCS file: /home/cvs/src/sys/arch/octeon/dev/if_cnmac.c,v > diff -u -p -r1.86 if_cnmac.c > --- sys/arch/octeon/dev/if_cnmac.c 20 May 2024 23:13:33 -0000 1.86 > +++ sys/arch/octeon/dev/if_cnmac.c 1 Apr 2026 16:16:23 -0000 > @@ -55,6 +55,11 @@ > #include > #include > #include > +#ifndef SMALL_KERNEL > +#include > +#include > +#include > +#endif > > #if NBPFILTER > 0 > #include > @@ -154,6 +159,11 @@ int cnmac_send(struct cnmac_softc *, str > int cnmac_reset(struct cnmac_softc *); > int cnmac_configure(struct cnmac_softc *); > int cnmac_configure_common(struct cnmac_softc *); > +unsigned int cnmac_rx_group_count(void); > +unsigned int cnmac_rx_group_order(unsigned int); > +void cnmac_rx_groups_init(void); > +void cnmac_rx_groups_config(struct cn30xxpow_softc *); > +void cnmac_rx_groups_barrier(void); > > void cnmac_free_task(void *); > void cnmac_tick_free(void *arg); > @@ -182,6 +192,15 @@ const struct cfattach cnmac_ca = { > > struct cfdriver cnmac_cd = { NULL, "cnmac", DV_IFNET }; > > +#define CNMAC_PIP_PORT_MAX 64 > + > +struct cnmac_rx_group { > + unsigned int crg_group; > + void *crg_ih; > + char crg_name[IFNAMSIZ]; > + struct mbuf_list crg_rx_batch[CNMAC_PIP_PORT_MAX]; > +}; > + > /* ---- buffer management */ > > const struct cnmac_pool_param { > @@ -204,7 +223,10 @@ uint64_t cnmac_mac_addr = 0; > uint32_t cnmac_mac_addr_offset = 0; > > int cnmac_mbufs_to_alloc; > -int cnmac_npowgroups = 0; > +unsigned int cnmac_nrxgroups = 0; > +unsigned int cnmac_nrxgroups_order = 0; > +struct cnmac_softc *cnmac_port_softc[CNMAC_PIP_PORT_MAX]; > +struct cnmac_rx_group cnmac_rx_groups[OCTEON_POW_GROUP_MAX]; > > void > cnmac_buf_init(struct cnmac_softc *sc) > @@ -225,6 +247,72 @@ cnmac_buf_init(struct cnmac_softc *sc) > } > } > > +unsigned int > +cnmac_rx_group_count(void) > +{ > + unsigned int count = 1; > + unsigned int target = softnet_count(); > + > + while (count < target && count < OCTEON_POW_GROUP_MAX) > + count <<= 1; > + > + return count; > +} > + > +unsigned int > +cnmac_rx_group_order(unsigned int count) > +{ > + unsigned int order = 0; > + > + while ((1U << order) < count) > + order++; > + > + return order; > +} > + > +void > +cnmac_rx_groups_init(void) > +{ > + struct cnmac_rx_group *crg; > + unsigned int i; > + > + if (cnmac_nrxgroups != 0) > + return; > + > + cnmac_nrxgroups = cnmac_rx_group_count(); > + cnmac_nrxgroups_order = cnmac_rx_group_order(cnmac_nrxgroups); > + > + for (i = 0; i < cnmac_nrxgroups; i++) { > + crg = &cnmac_rx_groups[i]; > + crg->crg_group = i; > + snprintf(crg->crg_name, sizeof(crg->crg_name), > + "cnmacrx%u", i); > + crg->crg_ih = octeon_intr_establish(POW_WORKQ_IRQ(i), > + IPL_NET | IPL_MPSAFE, cnmac_intr, crg, crg->crg_name); > + if (crg->crg_ih == NULL) > + panic("%s: could not set up interrupt", > + crg->crg_name); > + } > +} > + > +void > +cnmac_rx_groups_config(struct cn30xxpow_softc *pow) > +{ > + unsigned int i; > + > + for (i = 0; i < cnmac_nrxgroups; i++) > + cn30xxpow_config(pow, i); > +} > + > +void > +cnmac_rx_groups_barrier(void) > +{ > + unsigned int i; > + > + for (i = 0; i < cnmac_nrxgroups; i++) > + intr_barrier(cnmac_rx_groups[i].crg_ih); > +} > + > /* ---- autoconf */ > > int > @@ -246,11 +334,6 @@ cnmac_attach(struct device *parent, stru > struct cn30xxgmx_attach_args *ga = aux; > struct ifnet *ifp = &sc->sc_arpcom.ac_if; > > - if (cnmac_npowgroups >= OCTEON_POW_GROUP_MAX) { > - printf(": out of POW groups\n"); > - return; > - } > - > atomic_add_int(&cnmac_mbufs_to_alloc, > cnmac_mbuf_alloc(CNMAC_MBUFS_PER_PORT)); > > @@ -262,7 +345,6 @@ cnmac_attach(struct device *parent, stru > sc->sc_gmx_port = ga->ga_gmx_port; > sc->sc_smi = ga->ga_smi; > sc->sc_phy_addr = ga->ga_phy_addr; > - sc->sc_powgroup = cnmac_npowgroups++; > > sc->sc_init_flag = 0; > > @@ -282,6 +364,10 @@ cnmac_attach(struct device *parent, stru > task_set(&sc->sc_free_task, cnmac_free_task, sc); > timeout_set(&sc->sc_tick_misc_ch, cnmac_tick_misc, sc); > timeout_set(&sc->sc_tick_free_ch, cnmac_tick_free, sc); > + cnmac_rx_groups_init(); > + KASSERT(sc->sc_port < nitems(cnmac_port_softc)); > + KASSERT(cnmac_port_softc[sc->sc_port] == NULL); > + cnmac_port_softc[sc->sc_port] = sc; > > cn30xxfau_op_init(&sc->sc_fau_done, > OCTEON_CVMSEG_ETHER_OFFSET(sc->sc_dev.dv_unit, csm_ether_fau_done), > @@ -307,6 +393,9 @@ cnmac_attach(struct device *parent, stru > ifp->if_softc = sc; > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > ifp->if_xflags = IFXF_MPSAFE; > +#ifndef SMALL_KERNEL > + ifp->if_xflags |= IFXF_LRO; > +#endif > ifp->if_ioctl = cnmac_ioctl; > ifp->if_qstart = cnmac_start; > ifp->if_watchdog = cnmac_watchdog; > @@ -315,22 +404,21 @@ cnmac_attach(struct device *parent, stru > > ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_TCPv4 | > IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > +#ifndef SMALL_KERNEL > + ifp->if_capabilities |= IFCAP_LRO; > +#endif > > cn30xxgmx_set_filter(sc->sc_gmx_port); > > if_attach(ifp); > ether_ifattach(ifp); > + if_attach_iqueues(ifp, cnmac_nrxgroups); > > cnmac_buf_init(sc); > > #if NKSTAT > 0 > cnmac_kstat_attach(sc); > #endif > - > - sc->sc_ih = octeon_intr_establish(POW_WORKQ_IRQ(sc->sc_powgroup), > - IPL_NET | IPL_MPSAFE, cnmac_intr, sc, sc->sc_dev.dv_xname); > - if (sc->sc_ih == NULL) > - panic("%s: could not set up interrupt", sc->sc_dev.dv_xname); > } > > /* ---- submodules */ > @@ -343,7 +431,8 @@ cnmac_pip_init(struct cnmac_softc *sc) > pip_aa.aa_port = sc->sc_port; > pip_aa.aa_regt = sc->sc_regt; > pip_aa.aa_tag_type = POW_TAG_TYPE_ORDERED/* XXX */; > - pip_aa.aa_receive_group = sc->sc_powgroup; > + pip_aa.aa_receive_group = 0; > + pip_aa.aa_receive_group_order = cnmac_nrxgroups_order; > pip_aa.aa_ip_offset = sc->sc_ip_offset; > cn30xxpip_init(&pip_aa, &sc->sc_pip); > cn30xxpip_port_config(sc->sc_pip); > @@ -1026,7 +1115,7 @@ cnmac_stop(struct ifnet *ifp, int disabl > > cn30xxgmx_port_enable(sc->sc_gmx_port, 0); > > - intr_barrier(sc->sc_ih); > + cnmac_rx_groups_barrier(); > ifq_barrier(&ifp->if_snd); > > ifq_clr_oactive(&ifp->if_snd); > @@ -1058,7 +1147,7 @@ cnmac_configure(struct cnmac_softc *sc) > > cn30xxpko_port_config(sc->sc_pko); > cn30xxpko_port_enable(sc->sc_pko, 1); > - cn30xxpow_config(sc->sc_pow, sc->sc_powgroup); > + cnmac_rx_groups_config(sc->sc_pow); > > cn30xxgmx_port_enable(sc->sc_gmx_port, 1); > > @@ -1212,9 +1301,13 @@ cnmac_recv(struct cnmac_softc *sc, uint6 > { > struct ifnet *ifp = &sc->sc_arpcom.ac_if; > struct mbuf *m; > - uint64_t word2; > + uint64_t word1, word2; > int nmbuf = 0; > +#ifndef SMALL_KERNEL > + struct ether_extracted ext; > +#endif > > + word1 = work[1]; > word2 = work[2]; > > if (!(ifp->if_flags & IFF_RUNNING)) > @@ -1232,6 +1325,8 @@ cnmac_recv(struct cnmac_softc *sc, uint6 > } > > m->m_pkthdr.csum_flags = 0; > + m->m_pkthdr.ph_flowid = word1 & PIP_WQE_WORD1_TAG; > + SET(m->m_pkthdr.csum_flags, M_FLOWID); > if (__predict_true(!ISSET(word2, PIP_WQE_WORD2_IP_NI))) { > /* Check IP checksum status. */ > if (!ISSET(word2, PIP_WQE_WORD2_IP_V6) && > @@ -1246,7 +1341,19 @@ cnmac_recv(struct cnmac_softc *sc, uint6 > M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK; > } > > - ml_enqueue(ml, m); > +#ifndef SMALL_KERNEL > + if (__predict_true(ISSET(ifp->if_xflags, IFXF_LRO)) && > + __predict_true(!ISSET(word2, PIP_WQE_WORD2_IP_NI)) && > + ISSET(word2, PIP_WQE_WORD2_IP_TU) && > + !ISSET(word2, PIP_WQE_WORD2_IP_FR | PIP_WQE_WORD2_IP_LE)) { > + ether_extract_headers(m, &ext); > + if (ext.tcp != NULL) > + tcp_softlro_glue(ml, m, ifp); > + else > + ml_enqueue(ml, m); > + } else > +#endif > + ml_enqueue(ml, m); > > return nmbuf; > > @@ -1258,16 +1365,20 @@ drop: > int > cnmac_intr(void *arg) > { > - struct mbuf_list ml = MBUF_LIST_INITIALIZER(); > - struct cnmac_softc *sc = arg; > - struct ifnet *ifp = &sc->sc_arpcom.ac_if; > + struct cnmac_rx_group *crg = arg; > + struct cn30xxpow_softc *pow = &cn30xxpow_softc; > + struct cnmac_softc *sc; > + struct ifnet *ifp; > + struct mbuf_list *ml; > uint64_t *work; > - uint64_t wqmask = 1ull << sc->sc_powgroup; > + uint64_t pending = 0; > + uint64_t wqmask = 1ull << crg->crg_group; > uint32_t coreid = octeon_get_coreid(); > - uint32_t port; > + unsigned int port; > + unsigned int i; > int nmbuf = 0; > > - _POW_WR8(sc->sc_pow, POW_PP_GRP_MSK_OFFSET(coreid), wqmask); > + _POW_WR8(pow, POW_PP_GRP_MSK_OFFSET(coreid), wqmask); > > cn30xxpow_tag_sw_wait(); > cn30xxpow_work_request_async(OCTEON_CVMSEG_OFFSET(csm_pow_intr), > @@ -1284,18 +1395,30 @@ cnmac_intr(void *arg) > OCTEON_CVMSEG_OFFSET(csm_pow_intr), POW_NO_WAIT); > > port = (work[1] & PIP_WQE_WORD1_IPRT) >> 42; > - if (port != sc->sc_port) { > - printf("%s: unexpected wqe port %u, should be %u\n", > - sc->sc_dev.dv_xname, port, sc->sc_port); > + if (port >= nitems(cnmac_port_softc) || > + (sc = cnmac_port_softc[port]) == NULL) { > + printf("%s: unexpected wqe port %u\n", > + crg->crg_name, port); > goto wqe_error; > } > > - nmbuf += cnmac_recv(sc, work, &ml); > + if ((pending & (1ULL << port)) == 0) { > + ml_init(&crg->crg_rx_batch[port]); > + pending |= 1ULL << port; > + } > + nmbuf += cnmac_recv(sc, work, &crg->crg_rx_batch[port]); > } > > - _POW_WR8(sc->sc_pow, POW_WQ_INT_OFFSET, wqmask); > + _POW_WR8(pow, POW_WQ_INT_OFFSET, wqmask); > > - if_input(ifp, &ml); > + for (i = 0; i < nitems(cnmac_port_softc); i++) { > + if ((pending & (1ULL << i)) == 0) > + continue; > + sc = cnmac_port_softc[i]; > + ifp = &sc->sc_arpcom.ac_if; > + ml = &crg->crg_rx_batch[i]; > + ifiq_input(ifp->if_iqs[crg->crg_group], ml); > + } I think the pending entries could be found a bit faster like this: while (pending) { i = __builtin_ffsll(pending) - 1; ... pending &= ~(1ULL << i); } > > nmbuf = cnmac_mbuf_alloc(nmbuf); > if (nmbuf != 0) > Index: sys/arch/octeon/dev/if_cnmacvar.h > =================================================================== > RCS file: /home/cvs/src/sys/arch/octeon/dev/if_cnmacvar.h,v > diff -u -p -r1.20 if_cnmacvar.h > --- sys/arch/octeon/dev/if_cnmacvar.h 28 Dec 2022 01:39:21 -0000 1.20 > +++ sys/arch/octeon/dev/if_cnmacvar.h 1 Apr 2026 16:16:23 -0000 > @@ -63,7 +63,6 @@ struct cnmac_softc { > > bus_dmamap_t sc_dmap; > > - void *sc_ih; > struct cn30xxpip_softc *sc_pip; > struct cn30xxipd_softc *sc_ipd; > struct cn30xxpko_softc *sc_pko; > @@ -92,7 +91,6 @@ struct cnmac_softc { > uint32_t sc_port_type; > uint32_t sc_init_flag; > int sc_phy_addr; > - int sc_powgroup; > > /* > * Redirection - received (input) packets are redirected (directly sent) > Index: sys/arch/octeon/dev/octciu.c > =================================================================== > RCS file: /home/cvs/src/sys/arch/octeon/dev/octciu.c,v > diff -u -p -r1.19 octciu.c > --- sys/arch/octeon/dev/octciu.c 11 Dec 2022 05:31:05 -0000 1.19 > +++ sys/arch/octeon/dev/octciu.c 1 Apr 2026 16:16:36 -0000 > @@ -250,12 +250,6 @@ octciu_intr_establish(int irq, int level > panic("%s: illegal irq %d", __func__, irq); > #endif > > -#ifdef MULTIPROCESSOR > - /* Span work queue interrupts across CPUs. */ > - if (IS_WORKQ_IRQ(irq)) > - cpuid = irq % ncpus; > -#endif > - > flags = (level & IPL_MPSAFE) ? CIH_MPSAFE : 0; > level &= ~IPL_MPSAFE; > > Index: sys/arch/octeon/octeon/machdep.c > =================================================================== > RCS file: /home/cvs/src/sys/arch/octeon/octeon/machdep.c,v > diff -u -p -r1.137 machdep.c > --- sys/arch/octeon/octeon/machdep.c 24 Oct 2023 13:20:10 -0000 1.137 > +++ sys/arch/octeon/octeon/machdep.c 1 Apr 2026 16:16:36 -0000 > @@ -802,7 +802,7 @@ static u_int64_t > get_ncpusfound(void) > { > uint64_t core_mask; > - uint64_t i, ncpus = 0; > + uint64_t i, n = 0; > int chipid; > > chipid = octeon_get_chipid(); > @@ -818,9 +818,9 @@ get_ncpusfound(void) > > /* There has to be 1-to-1 mapping between cpuids and coreids. */ > for (i = 0; i < OCTEON_MAXCPUS && (core_mask & (1ul << i)) != 0; i++) > - ncpus++; > + n++; > > - return ncpus; > + return n; > } > > static enum octeon_board > @@ -1333,8 +1333,6 @@ hw_cpu_hatch(struct cpu_info *ci) > > ci->ci_flags |= CPUF_RUNNING; > membar_sync(); > - > - ncpus++; > > spl0(); > (void)updateimask(0); >