Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: sysctl: make amd64 HW_CPUSPEED mp-safe
To:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Cc:
mvs@openbsd.org, tedu@openbsd.org, tech@openbsd.org
Date:
Wed, 21 May 2025 13:49:57 +0200

Download raw body.

Thread
> Date: Wed, 21 May 2025 13:18:08 +0200
> From: Claudio Jeker <cjeker@diehard.n-r-g.com>
> 
> On Wed, May 21, 2025 at 12:14:31PM +0200, Kirill A. Korinsky wrote:
> > On Tue, 20 May 2025 23:49:23 +0200,
> > Vitaliy Makkoveev <mvs@openbsd.org> wrote:
> > > 
> > > Since we are here and at least I do hw.cpuspeed measurements, I like to
> > > make it mp-safe. Introduce temporary `cpu_cpuspeed_mpsafe' handler
> > > pointer and call it lockless if it is set. Kernel lock is pretty enough
> > > to call old `cpu_cpuspeed' handler, so we avoid `sysctl_lock'
> > > acquisition and useless pages wiring. The rest architectures can be
> > > adjusted separately.
> > > 
> > > ok?
> > >
> > 
> > Make sense and tested on amd64.
> > 
> > FWIW ok kirill@
> 
> Is there a way that does not introduce yet another API?
> This feels like one of those changes where cpu_cpuspeed will remain
> forever and cpu_cpuspeed_mpsafe will see no additional usage.

Perhaps more importantly, hw.cpuspeed really isn't a good match for
modern systems anymore because:

1. Many modern laptops have CPUs that integrate cores of different
   types that run at different clock speeds (and have wildly different
   performance at the same clock speed).

2. Many modern systems dynamically adjust the clock speed of
   individual cores based on activity and thermals.

We need something different.  For some systems we now report the
actual CPU clock speed through hw.sensors.  For others we provide
information using kstat.  We probably should think about a more
consistent approach and deprecate hw.cpuspeed.  And at that point...

...why bother making it "mpsafe".


> > > Index: sys/arch/amd64/amd64/identcpu.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
> > > retrieving revision 1.150
> > > diff -u -p -r1.150 identcpu.c
> > > --- sys/arch/amd64/amd64/identcpu.c	29 Apr 2025 20:19:48 -0000	1.150
> > > +++ sys/arch/amd64/amd64/identcpu.c	20 May 2025 21:30:47 -0000
> > > @@ -61,9 +61,14 @@ void	tsc_timecounter_init(struct cpu_inf
> > >  void	cpu_check_vmm_cap(struct cpu_info *);
> > >  #endif /* NVMM > 0 */
> > >  
> > > +/*
> > > + * Locks used to protect data:
> > > + *	a	atomic
> > > + */
> > > +
> > >  /* sysctl wants this. */
> > >  char cpu_model[48];
> > > -int cpuspeed;
> > > +int cpuspeed;			/* [a] */
> > >  
> > >  int amd64_has_xcrypt;
> > >  int amd64_pos_cbit;	/* C bit position for SEV */
> > > @@ -74,7 +79,7 @@ int has_rdseed;
> > >  int
> > >  cpu_amd64speed(int *freq)
> > >  {
> > > -	*freq = cpuspeed;
> > > +	*freq = atomic_load_int(&cpuspeed);
> > >  	return (0);
> > >  }
> > >  
> > > @@ -620,7 +625,7 @@ identifycpu(struct cpu_info *ci)
> > >  
> > >  	if (CPU_IS_PRIMARY(ci)) {
> > >  		cpuspeed = (freq + 4999) / 1000000;
> > > -		cpu_cpuspeed = cpu_amd64speed;
> > > +		cpu_cpuspeed_mpsafe = cpu_amd64speed;
> > >  	}
> > >  
> > >  	printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
> > > Index: sys/arch/amd64/amd64/k1x-pstate.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/arch/amd64/amd64/k1x-pstate.c,v
> > > retrieving revision 1.11
> > > diff -u -p -r1.11 k1x-pstate.c
> > > --- sys/arch/amd64/amd64/k1x-pstate.c	11 Aug 2021 18:31:48 -0000	1.11
> > > +++ sys/arch/amd64/amd64/k1x-pstate.c	20 May 2025 21:30:48 -0000
> > > @@ -26,6 +26,7 @@
> > >  
> > >  #include <sys/param.h>
> > >  #include <sys/systm.h>
> > > +#include <sys/atomic.h>
> > >  #include <sys/malloc.h>
> > >  #include <sys/sysctl.h>
> > >  
> > > @@ -106,7 +107,7 @@ k1x_transition(struct k1x_cpu_state *cst
> > >  		DELAY(100);
> > >  	}
> > >  	if (cfid == fid) {
> > > -		cpuspeed = cstate->state_table[level].freq;
> > > +		atomic_store_int(&cpuspeed, cstate->state_table[level].freq);
> > >  #if 0
> > >  		(void)printf("Target: %d Current: %d Pstate: %d\n",
> > >  		    cstate->state_table[level].freq,
> > > Index: sys/arch/amd64/amd64/powernow-k8.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/arch/amd64/amd64/powernow-k8.c,v
> > > retrieving revision 1.29
> > > diff -u -p -r1.29 powernow-k8.c
> > > --- sys/arch/amd64/amd64/powernow-k8.c	21 Feb 2022 08:16:08 -0000	1.29
> > > +++ sys/arch/amd64/amd64/powernow-k8.c	20 May 2025 21:30:48 -0000
> > > @@ -28,6 +28,7 @@
> > >  
> > >  #include <sys/param.h>
> > >  #include <sys/systm.h>
> > > +#include <sys/atomic.h>
> > >  #include <sys/malloc.h>
> > >  #include <sys/sysctl.h>
> > >  
> > > @@ -271,7 +272,7 @@ k8pnow_transition(struct k8pnow_cpu_stat
> > >  	}
> > >  
> > >  	if (cfid == fid || cvid == vid)
> > > -		cpuspeed = cstate->state_table[level].freq;
> > > +		atomic_store_int(&cpuspeed, cstate->state_table[level].freq);
> > >  }
> > >  
> > >  /*
> > > Index: sys/kern/kern_sysctl.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> > > retrieving revision 1.468
> > > diff -u -p -r1.468 kern_sysctl.c
> > > --- sys/kern/kern_sysctl.c	9 May 2025 14:53:22 -0000	1.468
> > > +++ sys/kern/kern_sysctl.c	20 May 2025 21:30:49 -0000
> > > @@ -171,6 +171,7 @@ int hw_sysctl_locked(int *, u_int, void 
> > >  	struct proc *);
> > >  
> > >  int (*cpu_cpuspeed)(int *);
> > > +int (*cpu_cpuspeed_mpsafe)(int *);
> > >  
> > >  /*
> > >   * Lock to avoid too many processes vslocking a large amount of memory
> > > @@ -844,9 +845,24 @@ hw_sysctl(int *name, u_int namelen, void
> > >  	case HW_USERMEM:
> > >  		return (sysctl_rdint(oldp, oldlenp, newp,
> > >  		    ptoa(physmem - uvmexp.wired)));
> > > +	case HW_CPUSPEED: {
> > > +		int cpuspeed;
> > > +
> > > +		if (cpu_cpuspeed_mpsafe)
> > > +			err = cpu_cpuspeed_mpsafe(&cpuspeed);
> > > +		else if (cpu_cpuspeed) {
> > > +			KERNEL_LOCK();
> > > +			err = cpu_cpuspeed(&cpuspeed);
> > > +			KERNEL_UNLOCK();
> > > +		} else
> > > +			err = EOPNOTSUPP;
> > > +
> > > +		if (err)
> > > +			return (err);
> > > +		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
> > > +	}
> > >  	case HW_DISKNAMES:
> > >  	case HW_DISKSTATS:
> > > -	case HW_CPUSPEED:
> > >  #ifndef	SMALL_KERNEL
> > >  	case HW_SENSORS:
> > >  	case HW_SETPERF:
> > > @@ -911,7 +927,7 @@ int
> > >  hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
> > >      void *newp, size_t newlen, struct proc *p)
> > >  {
> > > -	int err, cpuspeed;
> > > +	int err;
> > >  
> > >  	switch (name[0]) {
> > >  	case HW_DISKNAMES:
> > > @@ -929,13 +945,6 @@ hw_sysctl_locked(int *name, u_int namele
> > >  			return err;
> > >  		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
> > >  		    disk_count * sizeof(struct diskstats)));
> > > -	case HW_CPUSPEED:
> > > -		if (!cpu_cpuspeed)
> > > -			return (EOPNOTSUPP);
> > > -		err = cpu_cpuspeed(&cpuspeed);
> > > -		if (err)
> > > -			return err;
> > > -		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
> > >  #ifndef SMALL_KERNEL
> > >  	case HW_SENSORS:
> > >  		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
> > > Index: sys/sys/sysctl.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/sys/sysctl.h,v
> > > retrieving revision 1.242
> > > diff -u -p -r1.242 sysctl.h
> > > --- sys/sys/sysctl.h	29 Apr 2025 02:24:32 -0000	1.242
> > > +++ sys/sys/sysctl.h	20 May 2025 21:30:49 -0000
> > > @@ -1084,6 +1084,7 @@ int sysctl_sysvipc(int *, u_int, void *,
> > >  int sysctl_wdog(int *, u_int, void *, size_t *, void *, size_t);
> > >  
> > >  extern int (*cpu_cpuspeed)(int *);
> > > +extern int (*cpu_cpuspeed_mpsafe)(int *);
> > >  extern void (*cpu_setperf)(int);
> > >  
> > >  int net_ifiq_sysctl(int *, u_int, void *, size_t *, void *, size_t);
> > 
> > -- 
> > wbr, Kirill
> > 
> 
> -- 
> :wq Claudio
> 
>