From: Stefan Fritsch Subject: Re: Mostly-Inverted CPU-Core ordering To: Greg Schaefer Cc: "tech@openbsd.org" , visa@openbsd.org Date: Tue, 13 Jan 2026 18:34:39 +0100 visa@ Theo suggested to cc you because of mips64 On Tue, 13 Jan 2026, Stefan Fritsch wrote: > On Mon, 12 Jan 2026, Greg Schaefer wrote: > > > While debugging another issue, I noticed that amd64/cpu_attach (and others > > it appears) reorder the ACPI x2APIC CPU list. In a hybrid world, think that > > Intel recommends x2APIC ordering by "CPU power" (P-cores first, E-cores next, > > LP-cores last). cpu_info_list contains a static primary (boot-CPU) record > > and a linked-list to the remaining application processors. The boot core is > > copied to the static allocation, but the remaining cores are head-inserted > > (reversed) into the linked-list. > > > > case CPU_ROLE_AP: > > #if defined(MULTIPROCESSOR) > > cpu_intr_init(ci); > > cpu_start_secondary(ci); > > clockqueue_init(&ci->ci_queue); > > sched_init_cpu(ci); > > ncpus++; > > if (ci->ci_flags & CPUF_PRESENT) { > > ** ci->ci_next = cpu_info_list->ci_next; > > ** cpu_info_list->ci_next = ci; > > > > Using the example of a hybrid process with 2 P-cores, 2 E-cores and 2 LP-cores, > > the resulting ordering is P-Core 0, LP-Core 1, LP-Core 0, E-Core 1, E-core 0, > > P-core 0. When NICs setup their interrupt queues (often power-of-two), via > > intrmap_create(), they are striped across the cores in that mostly-reversed > > order. > > > > Unclear what, if any, performance impact this has, but it can make debugging > > interesting. The simple fix is appending to cpu_info_list instead of reverse > > head-inserting. Please ignore if a known/non-issue. > > I can confirm your analysis. I don't have access to a hybrid CPU system > at the moment, but I have Linux cpuinfo and acpi table infos from two > Dell laptops with Core i7-1270P and Core i7-1370P and they seem to have > the P-cores before the E-cores in the MADT. Also, if I enable INTRDEBUG > in amd64/intr.c, I see that CPU_INFO_FOREACH traverses the CPUs in the > order 0,5,4,3,2,1 on a 6 VCPU VM and intrmap_create() uses > CPU_INFO_FOREACH. > > I also agree that one probably wants to have multi-queue NICs use the > CPUs with the highest performance and re-ordering the cpu_info_list > seems to have that effect at least on intel. The amd64 diff below puts > them in the correct order and seems to work. But the same code seems to > be in most (all?) other MP architectures. > > I don't know what effect the different ordering could have on non-hybrid > systems. On multi-socket systems, the interrupt distribution over the > sockets may change, but I don't see a problem with that. The order in > which IPIs are send will also change, and the scheduler will likely also > do things slightly differently, but I have no idea if that would have a > measurable impact. Anything else? Updated diff below that should fix all architectures. sparc64 already has the correct ordering. Boot tested on amd64, i386, arm64. diff --git a/sys/arch/alpha/alpha/cpu.c b/sys/arch/alpha/alpha/cpu.c index 23b25c94219..38425f1d905 100644 --- a/sys/arch/alpha/alpha/cpu.c +++ b/sys/arch/alpha/alpha/cpu.c @@ -419,7 +419,7 @@ cpu_announce_extensions(struct cpu_info *ci) void cpu_boot_secondary_processors(void) { - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; u_long i; for (i = 0; i < ALPHA_MAXPROCS; i++) { @@ -434,8 +434,10 @@ cpu_boot_secondary_processors(void) /* * Link the processor into the list, and launch it. */ - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; atomic_setbits_ulong(&ci->ci_flags, CPUF_RUNNING); atomic_setbits_ulong(&cpus_running, (1UL << i)); ncpus++; diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 18cfb8d6377..b2e59ae800f 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -583,7 +583,7 @@ cpu_attach(struct device *parent, struct device *self, void *aux) { struct cpu_softc *sc = (void *) self; struct cpu_attach_args *caa = aux; - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; #if defined(MULTIPROCESSOR) int cpunum = sc->sc_dev.dv_unit; vaddr_t kstack; @@ -729,8 +729,10 @@ cpu_attach(struct device *parent, struct device *self, void *aux) sched_init_cpu(ci); ncpus++; if (ci->ci_flags & CPUF_PRESENT) { - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; } #else printf("%s: not started\n", sc->sc_dev.dv_xname); diff --git a/sys/arch/arm/arm/cpu.c b/sys/arch/arm/arm/cpu.c index f51404f73be..0ce7f92fbf2 100644 --- a/sys/arch/arm/arm/cpu.c +++ b/sys/arch/arm/arm/cpu.c @@ -342,7 +342,7 @@ void cpu_attach(struct device *parent, struct device *dev, void *aux) { struct fdt_attach_args *faa = aux; - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; uint32_t mpidr; uint32_t opp; @@ -356,8 +356,10 @@ cpu_attach(struct device *parent, struct device *dev, void *aux) } else { ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK | M_ZERO); cpu_info[dev->dv_unit] = ci; - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; ci->ci_flags |= CPUF_AP; ncpus++; } diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c index f9784e432db..5dabc020a9c 100644 --- a/sys/arch/arm64/arm64/cpu.c +++ b/sys/arch/arm64/arm64/cpu.c @@ -1478,7 +1478,7 @@ void cpu_attach(struct device *parent, struct device *dev, void *aux) { struct fdt_attach_args *faa = aux; - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; void *kstack; #ifdef MULTIPROCESSOR uint64_t mpidr = READ_SPECIALREG(mpidr_el1); @@ -1494,8 +1494,10 @@ cpu_attach(struct device *parent, struct device *dev, void *aux) } else { ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK | M_ZERO); cpu_info[dev->dv_unit] = ci; - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; ci->ci_flags |= CPUF_AP; ncpus++; } diff --git a/sys/arch/i386/i386/cpu.c b/sys/arch/i386/i386/cpu.c index c5f18dd4433..1de18768a37 100644 --- a/sys/arch/i386/i386/cpu.c +++ b/sys/arch/i386/i386/cpu.c @@ -231,7 +231,7 @@ cpu_attach(struct device *parent, struct device *self, void *aux) { struct cpu_softc *sc = (void *)self; struct cpu_attach_args *caa = (struct cpu_attach_args *)aux; - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; #ifdef MULTIPROCESSOR int cpunum = sc->sc_dev.dv_unit; @@ -365,8 +365,10 @@ cpu_attach(struct device *parent, struct device *self, void *aux) identifycpu(ci); clockqueue_init(&ci->ci_queue); sched_init_cpu(ci); - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; ncpus++; #endif break; diff --git a/sys/arch/mips64/mips64/cpu.c b/sys/arch/mips64/mips64/cpu.c index 040669d397b..309f671b117 100644 --- a/sys/arch/mips64/mips64/cpu.c +++ b/sys/arch/mips64/mips64/cpu.c @@ -78,7 +78,7 @@ cpuattach(struct device *parent, struct device *dev, void *aux) { struct cpu_attach_args *caa = aux; struct cpu_hwinfo *ch = caa->caa_hw; - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; int cpuno = dev->dv_unit; int fptype, vers_maj, vers_min; int displayver; @@ -96,8 +96,10 @@ cpuattach(struct device *parent, struct device *dev, void *aux) } } else { ci = &cpu_info_secondaries[cpuno - 1]; - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; ci->ci_flags |= CPUF_PRESENT; } #else diff --git a/sys/arch/riscv64/riscv64/cpu.c b/sys/arch/riscv64/riscv64/cpu.c index e8400d4eed3..fe75ab407cd 100644 --- a/sys/arch/riscv64/riscv64/cpu.c +++ b/sys/arch/riscv64/riscv64/cpu.c @@ -183,7 +183,7 @@ void cpu_attach(struct device *parent, struct device *dev, void *aux) { struct fdt_attach_args *faa = aux; - struct cpu_info *ci; + struct cpu_info *ci, *ci_last; int node, level; uint32_t opp; @@ -197,8 +197,10 @@ cpu_attach(struct device *parent, struct device *dev, void *aux) } else { ci = malloc(sizeof(*ci), M_DEVBUF, M_WAITOK | M_ZERO); cpu_info[dev->dv_unit] = ci; - ci->ci_next = cpu_info_list->ci_next; - cpu_info_list->ci_next = ci; + ci_last = cpu_info_list; + while (ci_last->ci_next != NULL) + ci_last = ci_last->ci_next; + ci_last->ci_next = ci; ci->ci_flags |= CPUF_AP; ncpus++; }