From: Vitaliy Makkoveev Subject: Re: sysctl: make amd64 HW_CPUSPEED mp-safe To: tedu@openbsd.org, tech@openbsd.org Date: Wed, 21 May 2025 13:39:42 +0000 On Wed, May 21, 2025 at 01:18:08PM +0200, Claudio Jeker wrote: > On Wed, May 21, 2025 at 12:14:31PM +0200, Kirill A. Korinsky wrote: > > On Tue, 20 May 2025 23:49:23 +0200, > > Vitaliy Makkoveev wrote: > > > > > > Since we are here and at least I do hw.cpuspeed measurements, I like to > > > make it mp-safe. Introduce temporary `cpu_cpuspeed_mpsafe' handler > > > pointer and call it lockless if it is set. Kernel lock is pretty enough > > > to call old `cpu_cpuspeed' handler, so we avoid `sysctl_lock' > > > acquisition and useless pages wiring. The rest architectures can be > > > adjusted separately. > > > > > > ok? > > > > > > > Make sense and tested on amd64. > > > > FWIW ok kirill@ > > Is there a way that does not introduce yet another API? > This feels like one of those changes where cpu_cpuspeed will remain > forever and cpu_cpuspeed_mpsafe will see no additional usage. > The only way is to push kernel lock down to `cpu_cpuspeed' handlers. > > > Index: sys/arch/amd64/amd64/identcpu.c > > > =================================================================== > > > RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v > > > retrieving revision 1.150 > > > diff -u -p -r1.150 identcpu.c > > > --- sys/arch/amd64/amd64/identcpu.c 29 Apr 2025 20:19:48 -0000 1.150 > > > +++ sys/arch/amd64/amd64/identcpu.c 20 May 2025 21:30:47 -0000 > > > @@ -61,9 +61,14 @@ void tsc_timecounter_init(struct cpu_inf > > > void cpu_check_vmm_cap(struct cpu_info *); > > > #endif /* NVMM > 0 */ > > > > > > +/* > > > + * Locks used to protect data: > > > + * a atomic > > > + */ > > > + > > > /* sysctl wants this. */ > > > char cpu_model[48]; > > > -int cpuspeed; > > > +int cpuspeed; /* [a] */ > > > > > > int amd64_has_xcrypt; > > > int amd64_pos_cbit; /* C bit position for SEV */ > > > @@ -74,7 +79,7 @@ int has_rdseed; > > > int > > > cpu_amd64speed(int *freq) > > > { > > > - *freq = cpuspeed; > > > + *freq = atomic_load_int(&cpuspeed); > > > return (0); > > > } > > > > > > @@ -620,7 +625,7 @@ identifycpu(struct cpu_info *ci) > > > > > > if (CPU_IS_PRIMARY(ci)) { > > > cpuspeed = (freq + 4999) / 1000000; > > > - cpu_cpuspeed = cpu_amd64speed; > > > + cpu_cpuspeed_mpsafe = cpu_amd64speed; > > > } > > > > > > printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model, > > > Index: sys/arch/amd64/amd64/k1x-pstate.c > > > =================================================================== > > > RCS file: /cvs/src/sys/arch/amd64/amd64/k1x-pstate.c,v > > > retrieving revision 1.11 > > > diff -u -p -r1.11 k1x-pstate.c > > > --- sys/arch/amd64/amd64/k1x-pstate.c 11 Aug 2021 18:31:48 -0000 1.11 > > > +++ sys/arch/amd64/amd64/k1x-pstate.c 20 May 2025 21:30:48 -0000 > > > @@ -26,6 +26,7 @@ > > > > > > #include > > > #include > > > +#include > > > #include > > > #include > > > > > > @@ -106,7 +107,7 @@ k1x_transition(struct k1x_cpu_state *cst > > > DELAY(100); > > > } > > > if (cfid == fid) { > > > - cpuspeed = cstate->state_table[level].freq; > > > + atomic_store_int(&cpuspeed, cstate->state_table[level].freq); > > > #if 0 > > > (void)printf("Target: %d Current: %d Pstate: %d\n", > > > cstate->state_table[level].freq, > > > Index: sys/arch/amd64/amd64/powernow-k8.c > > > =================================================================== > > > RCS file: /cvs/src/sys/arch/amd64/amd64/powernow-k8.c,v > > > retrieving revision 1.29 > > > diff -u -p -r1.29 powernow-k8.c > > > --- sys/arch/amd64/amd64/powernow-k8.c 21 Feb 2022 08:16:08 -0000 1.29 > > > +++ sys/arch/amd64/amd64/powernow-k8.c 20 May 2025 21:30:48 -0000 > > > @@ -28,6 +28,7 @@ > > > > > > #include > > > #include > > > +#include > > > #include > > > #include > > > > > > @@ -271,7 +272,7 @@ k8pnow_transition(struct k8pnow_cpu_stat > > > } > > > > > > if (cfid == fid || cvid == vid) > > > - cpuspeed = cstate->state_table[level].freq; > > > + atomic_store_int(&cpuspeed, cstate->state_table[level].freq); > > > } > > > > > > /* > > > Index: sys/kern/kern_sysctl.c > > > =================================================================== > > > RCS file: /cvs/src/sys/kern/kern_sysctl.c,v > > > retrieving revision 1.468 > > > diff -u -p -r1.468 kern_sysctl.c > > > --- sys/kern/kern_sysctl.c 9 May 2025 14:53:22 -0000 1.468 > > > +++ sys/kern/kern_sysctl.c 20 May 2025 21:30:49 -0000 > > > @@ -171,6 +171,7 @@ int hw_sysctl_locked(int *, u_int, void > > > struct proc *); > > > > > > int (*cpu_cpuspeed)(int *); > > > +int (*cpu_cpuspeed_mpsafe)(int *); > > > > > > /* > > > * Lock to avoid too many processes vslocking a large amount of memory > > > @@ -844,9 +845,24 @@ hw_sysctl(int *name, u_int namelen, void > > > case HW_USERMEM: > > > return (sysctl_rdint(oldp, oldlenp, newp, > > > ptoa(physmem - uvmexp.wired))); > > > + case HW_CPUSPEED: { > > > + int cpuspeed; > > > + > > > + if (cpu_cpuspeed_mpsafe) > > > + err = cpu_cpuspeed_mpsafe(&cpuspeed); > > > + else if (cpu_cpuspeed) { > > > + KERNEL_LOCK(); > > > + err = cpu_cpuspeed(&cpuspeed); > > > + KERNEL_UNLOCK(); > > > + } else > > > + err = EOPNOTSUPP; > > > + > > > + if (err) > > > + return (err); > > > + return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed)); > > > + } > > > case HW_DISKNAMES: > > > case HW_DISKSTATS: > > > - case HW_CPUSPEED: > > > #ifndef SMALL_KERNEL > > > case HW_SENSORS: > > > case HW_SETPERF: > > > @@ -911,7 +927,7 @@ int > > > hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, > > > void *newp, size_t newlen, struct proc *p) > > > { > > > - int err, cpuspeed; > > > + int err; > > > > > > switch (name[0]) { > > > case HW_DISKNAMES: > > > @@ -929,13 +945,6 @@ hw_sysctl_locked(int *name, u_int namele > > > return err; > > > return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats, > > > disk_count * sizeof(struct diskstats))); > > > - case HW_CPUSPEED: > > > - if (!cpu_cpuspeed) > > > - return (EOPNOTSUPP); > > > - err = cpu_cpuspeed(&cpuspeed); > > > - if (err) > > > - return err; > > > - return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed)); > > > #ifndef SMALL_KERNEL > > > case HW_SENSORS: > > > return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp, > > > Index: sys/sys/sysctl.h > > > =================================================================== > > > RCS file: /cvs/src/sys/sys/sysctl.h,v > > > retrieving revision 1.242 > > > diff -u -p -r1.242 sysctl.h > > > --- sys/sys/sysctl.h 29 Apr 2025 02:24:32 -0000 1.242 > > > +++ sys/sys/sysctl.h 20 May 2025 21:30:49 -0000 > > > @@ -1084,6 +1084,7 @@ int sysctl_sysvipc(int *, u_int, void *, > > > int sysctl_wdog(int *, u_int, void *, size_t *, void *, size_t); > > > > > > extern int (*cpu_cpuspeed)(int *); > > > +extern int (*cpu_cpuspeed_mpsafe)(int *); > > > extern void (*cpu_setperf)(int); > > > > > > int net_ifiq_sysctl(int *, u_int, void *, size_t *, void *, size_t); > > > > -- > > wbr, Kirill > > > > -- > :wq Claudio