Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: sysctl: make amd64 HW_CPUSPEED mp-safe
To:
tedu@openbsd.org, tech@openbsd.org
Date:
Wed, 21 May 2025 13:39:42 +0000

Download raw body.

Thread
On Wed, May 21, 2025 at 01:18:08PM +0200, Claudio Jeker wrote:
> On Wed, May 21, 2025 at 12:14:31PM +0200, Kirill A. Korinsky wrote:
> > On Tue, 20 May 2025 23:49:23 +0200,
> > Vitaliy Makkoveev <mvs@openbsd.org> wrote:
> > > 
> > > Since we are here and at least I do hw.cpuspeed measurements, I like to
> > > make it mp-safe. Introduce temporary `cpu_cpuspeed_mpsafe' handler
> > > pointer and call it lockless if it is set. Kernel lock is pretty enough
> > > to call old `cpu_cpuspeed' handler, so we avoid `sysctl_lock'
> > > acquisition and useless pages wiring. The rest architectures can be
> > > adjusted separately.
> > > 
> > > ok?
> > >
> > 
> > Make sense and tested on amd64.
> > 
> > FWIW ok kirill@
> 
> Is there a way that does not introduce yet another API?
> This feels like one of those changes where cpu_cpuspeed will remain
> forever and cpu_cpuspeed_mpsafe will see no additional usage.
> 

The only way is to push kernel lock down to `cpu_cpuspeed' handlers.

> > > Index: sys/arch/amd64/amd64/identcpu.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
> > > retrieving revision 1.150
> > > diff -u -p -r1.150 identcpu.c
> > > --- sys/arch/amd64/amd64/identcpu.c	29 Apr 2025 20:19:48 -0000	1.150
> > > +++ sys/arch/amd64/amd64/identcpu.c	20 May 2025 21:30:47 -0000
> > > @@ -61,9 +61,14 @@ void	tsc_timecounter_init(struct cpu_inf
> > >  void	cpu_check_vmm_cap(struct cpu_info *);
> > >  #endif /* NVMM > 0 */
> > >  
> > > +/*
> > > + * Locks used to protect data:
> > > + *	a	atomic
> > > + */
> > > +
> > >  /* sysctl wants this. */
> > >  char cpu_model[48];
> > > -int cpuspeed;
> > > +int cpuspeed;			/* [a] */
> > >  
> > >  int amd64_has_xcrypt;
> > >  int amd64_pos_cbit;	/* C bit position for SEV */
> > > @@ -74,7 +79,7 @@ int has_rdseed;
> > >  int
> > >  cpu_amd64speed(int *freq)
> > >  {
> > > -	*freq = cpuspeed;
> > > +	*freq = atomic_load_int(&cpuspeed);
> > >  	return (0);
> > >  }
> > >  
> > > @@ -620,7 +625,7 @@ identifycpu(struct cpu_info *ci)
> > >  
> > >  	if (CPU_IS_PRIMARY(ci)) {
> > >  		cpuspeed = (freq + 4999) / 1000000;
> > > -		cpu_cpuspeed = cpu_amd64speed;
> > > +		cpu_cpuspeed_mpsafe = cpu_amd64speed;
> > >  	}
> > >  
> > >  	printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
> > > Index: sys/arch/amd64/amd64/k1x-pstate.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/arch/amd64/amd64/k1x-pstate.c,v
> > > retrieving revision 1.11
> > > diff -u -p -r1.11 k1x-pstate.c
> > > --- sys/arch/amd64/amd64/k1x-pstate.c	11 Aug 2021 18:31:48 -0000	1.11
> > > +++ sys/arch/amd64/amd64/k1x-pstate.c	20 May 2025 21:30:48 -0000
> > > @@ -26,6 +26,7 @@
> > >  
> > >  #include <sys/param.h>
> > >  #include <sys/systm.h>
> > > +#include <sys/atomic.h>
> > >  #include <sys/malloc.h>
> > >  #include <sys/sysctl.h>
> > >  
> > > @@ -106,7 +107,7 @@ k1x_transition(struct k1x_cpu_state *cst
> > >  		DELAY(100);
> > >  	}
> > >  	if (cfid == fid) {
> > > -		cpuspeed = cstate->state_table[level].freq;
> > > +		atomic_store_int(&cpuspeed, cstate->state_table[level].freq);
> > >  #if 0
> > >  		(void)printf("Target: %d Current: %d Pstate: %d\n",
> > >  		    cstate->state_table[level].freq,
> > > Index: sys/arch/amd64/amd64/powernow-k8.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/arch/amd64/amd64/powernow-k8.c,v
> > > retrieving revision 1.29
> > > diff -u -p -r1.29 powernow-k8.c
> > > --- sys/arch/amd64/amd64/powernow-k8.c	21 Feb 2022 08:16:08 -0000	1.29
> > > +++ sys/arch/amd64/amd64/powernow-k8.c	20 May 2025 21:30:48 -0000
> > > @@ -28,6 +28,7 @@
> > >  
> > >  #include <sys/param.h>
> > >  #include <sys/systm.h>
> > > +#include <sys/atomic.h>
> > >  #include <sys/malloc.h>
> > >  #include <sys/sysctl.h>
> > >  
> > > @@ -271,7 +272,7 @@ k8pnow_transition(struct k8pnow_cpu_stat
> > >  	}
> > >  
> > >  	if (cfid == fid || cvid == vid)
> > > -		cpuspeed = cstate->state_table[level].freq;
> > > +		atomic_store_int(&cpuspeed, cstate->state_table[level].freq);
> > >  }
> > >  
> > >  /*
> > > Index: sys/kern/kern_sysctl.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> > > retrieving revision 1.468
> > > diff -u -p -r1.468 kern_sysctl.c
> > > --- sys/kern/kern_sysctl.c	9 May 2025 14:53:22 -0000	1.468
> > > +++ sys/kern/kern_sysctl.c	20 May 2025 21:30:49 -0000
> > > @@ -171,6 +171,7 @@ int hw_sysctl_locked(int *, u_int, void 
> > >  	struct proc *);
> > >  
> > >  int (*cpu_cpuspeed)(int *);
> > > +int (*cpu_cpuspeed_mpsafe)(int *);
> > >  
> > >  /*
> > >   * Lock to avoid too many processes vslocking a large amount of memory
> > > @@ -844,9 +845,24 @@ hw_sysctl(int *name, u_int namelen, void
> > >  	case HW_USERMEM:
> > >  		return (sysctl_rdint(oldp, oldlenp, newp,
> > >  		    ptoa(physmem - uvmexp.wired)));
> > > +	case HW_CPUSPEED: {
> > > +		int cpuspeed;
> > > +
> > > +		if (cpu_cpuspeed_mpsafe)
> > > +			err = cpu_cpuspeed_mpsafe(&cpuspeed);
> > > +		else if (cpu_cpuspeed) {
> > > +			KERNEL_LOCK();
> > > +			err = cpu_cpuspeed(&cpuspeed);
> > > +			KERNEL_UNLOCK();
> > > +		} else
> > > +			err = EOPNOTSUPP;
> > > +
> > > +		if (err)
> > > +			return (err);
> > > +		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
> > > +	}
> > >  	case HW_DISKNAMES:
> > >  	case HW_DISKSTATS:
> > > -	case HW_CPUSPEED:
> > >  #ifndef	SMALL_KERNEL
> > >  	case HW_SENSORS:
> > >  	case HW_SETPERF:
> > > @@ -911,7 +927,7 @@ int
> > >  hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
> > >      void *newp, size_t newlen, struct proc *p)
> > >  {
> > > -	int err, cpuspeed;
> > > +	int err;
> > >  
> > >  	switch (name[0]) {
> > >  	case HW_DISKNAMES:
> > > @@ -929,13 +945,6 @@ hw_sysctl_locked(int *name, u_int namele
> > >  			return err;
> > >  		return (sysctl_rdstruct(oldp, oldlenp, newp, diskstats,
> > >  		    disk_count * sizeof(struct diskstats)));
> > > -	case HW_CPUSPEED:
> > > -		if (!cpu_cpuspeed)
> > > -			return (EOPNOTSUPP);
> > > -		err = cpu_cpuspeed(&cpuspeed);
> > > -		if (err)
> > > -			return err;
> > > -		return (sysctl_rdint(oldp, oldlenp, newp, cpuspeed));
> > >  #ifndef SMALL_KERNEL
> > >  	case HW_SENSORS:
> > >  		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
> > > Index: sys/sys/sysctl.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/sys/sysctl.h,v
> > > retrieving revision 1.242
> > > diff -u -p -r1.242 sysctl.h
> > > --- sys/sys/sysctl.h	29 Apr 2025 02:24:32 -0000	1.242
> > > +++ sys/sys/sysctl.h	20 May 2025 21:30:49 -0000
> > > @@ -1084,6 +1084,7 @@ int sysctl_sysvipc(int *, u_int, void *,
> > >  int sysctl_wdog(int *, u_int, void *, size_t *, void *, size_t);
> > >  
> > >  extern int (*cpu_cpuspeed)(int *);
> > > +extern int (*cpu_cpuspeed_mpsafe)(int *);
> > >  extern void (*cpu_setperf)(int);
> > >  
> > >  int net_ifiq_sysctl(int *, u_int, void *, size_t *, void *, size_t);
> > 
> > -- 
> > wbr, Kirill
> > 
> 
> -- 
> :wq Claudio