Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: nvme(4) sensors
To:
Jonathan Matthew <jonathan@d14n.org>
Cc:
tech@openbsd.org
Date:
Thu, 11 Jul 2024 16:03:32 +0200

Download raw body.

Thread
> Date: Thu, 11 Jul 2024 15:49:27 +0200
> From: Jonathan Matthew <jonathan@d14n.org>
> 
> This adds a basic set of sensors for nvme(4) showing device temperature
> and overall health.

Diff doesn't compile on arm64.  Adding #include <sys/sensors.h> to
dev/ic/nvmevar.h fixes that.

> It looks like this:
> 
> $ sysctl hw.sensors.nvme0
> hw.sensors.nvme0.temp0=42.85 degC, OK
> hw.sensors.nvme0.percent0=0.00% (endurance used), OK
> hw.sensors.nvme0.percent1=100.00% (available spare), OK
> 
> If the temperature exceeds the device's threshold, temp0 status changes
> to critical, and if the available spare capacity falls below the device's
> threshold, percent1 status changes to critical.
> 
> The nvme features used here have been mandatory since version 1.0 of
> the specification, so it's reasonable to just assume they're available.



hw.sensors.nvme0.temp0=38.85 degC, OK
hw.sensors.nvme0.percent0=0.00% (endurance used), OK
hw.sensors.nvme0.percent1=100.00% (available spare), OK

> Do the sensor names make sense?  Is refreshing them once per minute enough?

Probably.  Maybe if we want the hardware to self-protect we'd need to
poll more often.  Or use an interrupt if the hardware supports it.

ok kettenis@ with the include issue fixed.

> diff refs/heads/master refs/heads/nvmesensors
> commit - c0b7aa147b16eeebb8c9dc6debf303af3c74b7d5
> commit + c2dfadfa63c1492fba8b60f352c0b10245d0b842
> blob - dcbf40187741abb9fb5614d19085336bdf5ca7c0
> blob + a479a5f410d9e1cc5747dab44c56ad56fd9879c6
> --- sys/dev/ic/nvme.c
> +++ sys/dev/ic/nvme.c
> @@ -60,6 +60,10 @@ void	nvme_dumpregs(struct nvme_softc *);
>  int	nvme_identify(struct nvme_softc *, u_int);
>  void	nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
>  
> +#ifndef SMALL_KERNEL
> +void	nvme_refresh_sensors(void *);
> +#endif
> +
>  int	nvme_ccbs_alloc(struct nvme_softc *, u_int);
>  void	nvme_ccbs_free(struct nvme_softc *, u_int);
>  
> @@ -158,6 +162,7 @@ static const struct nvme_ops nvme_ops = {
>  #define NVME_TIMO_QOP			5000	/* ms to create/delete queue */
>  #define NVME_TIMO_PT			5000	/* ms to complete passthrough */
>  #define NVME_TIMO_IDENT			10000	/* ms to probe/identify */
> +#define NVME_TIMO_LOG_PAGE		5000	/* ms to read log pages */
>  #define NVME_TIMO_DELAYNS		10	/* ns to delay() in poll loop */
>  
>  /*
> @@ -407,6 +412,31 @@ nvme_attach(struct nvme_softc *sc)
>  	saa.saa_quirks = saa.saa_flags = 0;
>  	saa.saa_wwpn = saa.saa_wwnn = 0;
>  
> +	strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname));
> +
> +#ifndef SMALL_KERNEL
> +	sc->sc_temp_sensor.type = SENSOR_TEMP;
> +	sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
> +	sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor);
> +
> +	sc->sc_usage_sensor.type = SENSOR_PERCENT;
> +	sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
> +	strlcpy(sc->sc_usage_sensor.desc, "endurance used",
> +	    sizeof(sc->sc_usage_sensor.desc));
> +	sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor);
> +
> +	sc->sc_spare_sensor.type = SENSOR_PERCENT;
> +	sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
> +	strlcpy(sc->sc_spare_sensor.desc, "available spare",
> +	    sizeof(sc->sc_spare_sensor.desc));
> +	sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor);
> +
> +	if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL)
> +		goto free_q;
> +
> +	sensordev_install(&sc->sc_sensordev);
> +#endif
> +
>  	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
>  	    &saa, scsiprint);
>  #if NBIO > 0
> @@ -2128,3 +2158,67 @@ nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_di
>  	return 0;
>  }
>  #endif	/* NBIO > 0 */
> +
> +#ifndef SMALL_KERNEL
> +void
> +nvme_refresh_sensors(void *arg)
> +{
> +	struct nvme_softc 		*sc = arg;
> +	struct nvme_sqe			 sqe;
> +	struct nvme_dmamem		*mem = NULL;
> +	struct nvme_ccb			*ccb = NULL;
> +	struct nvm_smart_health 	*health;
> +	uint32_t			 dwlen;
> +	uint8_t 			 cw;
> +	int				 flags;
> +	int				 rv = 0;
> +
> +	ccb = nvme_ccb_get(sc);
> +	if (ccb == NULL)
> +		panic("nvme_refresh_sensors: nvme_ccb_get returned NULL");
> +
> +	mem = nvme_dmamem_alloc(sc, sizeof(*health));
> +	if (mem == NULL) {
> +		rv = ENOMEM;
> +		goto done;
> +	}
> +	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
> +
> +	dwlen = (sizeof(*health) >> 2) - 1;
> +	memset(&sqe, 0, sizeof(sqe));
> +	sqe.opcode = NVM_ADMIN_GET_LOG_PG;
> +	htolem32(&sqe.nsid, 0xffffffff);
> +	htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH));
> +	htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
> +
> +	ccb->ccb_done = nvme_empty_done;
> +	ccb->ccb_cookie = &sqe;
> +	flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE);
> +
> +	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
> +
> +	if (flags != 0) {
> +		sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
> +		sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
> +		sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
> +	} else {
> +		health = NVME_DMA_KVA(mem); 
> +		cw = health->critical_warning;
> +
> +		sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ?
> +		    SENSOR_S_CRIT : SENSOR_S_OK;
> +		sc->sc_temp_sensor.value = health->temperature * 1000000;
> +
> +		sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ?
> +		    SENSOR_S_CRIT : SENSOR_S_OK;
> +		sc->sc_spare_sensor.value = health->avail_spare * 1000;
> +
> +		sc->sc_usage_sensor.status = SENSOR_S_OK;
> +		sc->sc_usage_sensor.value = health->percent_used * 1000;
> +	}
> + done:
> +	if (mem != NULL)
> +		nvme_dmamem_free(sc, mem);
> +	nvme_ccb_put(sc, ccb);
> +}
> +#endif /* SMALL_KERNEL */
> blob - 2a28c6af83ef74fce326515381d774767fd3152d
> blob + 7ed5292bc2d6fff0ab4bf0a155d634930a6a255d
> --- sys/dev/ic/nvmereg.h
> +++ sys/dev/ic/nvmereg.h
> @@ -415,3 +415,41 @@ struct nvm_identify_namespace {
>  
>  	u_int8_t	vs[3712];
>  } __packed __aligned(8);
> +
> +#define NVM_LOG_PAGE_SMART_HEALTH	0x02
> +struct nvm_smart_health {
> +	u_int8_t	critical_warning;
> +#define NVM_HEALTH_CW_SPARE		(1 << 0)
> +#define NVM_HEALTH_CW_TEMP		(1 << 1)
> +#define NVM_HEALTH_CW_MEDIA		(1 << 2)
> +#define NVM_HEALTH_CW_READONLY		(1 << 3)
> +#define NVM_HEALTH_CW_VOLATILE		(1 << 4)
> +#define NVM_HEALTH_CW_PMR		(1 << 5)
> +	u_int16_t	temperature;
> +	u_int8_t	avail_spare;
> +	u_int8_t	avail_spare_threshold;
> +	u_int8_t	percent_used;
> +	u_int8_t	end_grp_summary;	/* 1.4+ */
> +
> +	u_int8_t	_reserved1[25];
> +
> +	u_int64_t	data_units_read[2];
> +	u_int64_t	data_units_written[2];
> +	u_int64_t	host_read_commands[2];
> +	u_int64_t	host_write_commands[2];
> +	u_int64_t	busy_time[2];
> +	u_int64_t	power_cycles[2];
> +	u_int64_t	power_on_hours[2];
> +	u_int64_t	unsafe_shutdowns[2];
> +	u_int64_t	integrity_errors[2];
> +	u_int64_t	error_log_entries[2];
> +	u_int32_t	warn_temp_time;		/* 1.2+ */
> +	u_int32_t	crit_temp_time;		/* 1.2+ */
> +	u_int16_t	temp_sensors[8];	/* 1.2+ */
> +	u_int32_t	therm_mgmt_count_1;	/* 1.3+ */
> +	u_int32_t	therm_mgmt_count_2;	/* 1.3+ */
> +	u_int32_t	therm_mgmt_time_1;	/* 1.3+ */
> +	u_int32_t	therm_mgmt_time_2;	/* 1.3+ */
> +	
> +	u_int8_t	_reserved2[280];
> +} __packed __aligned(8);
> blob - 15137057818e377544126b710de0c09a2f6b6541
> blob + 78f47ddd42f2e2b5166609b12160351a60f0c911
> --- sys/dev/ic/nvmevar.h
> +++ sys/dev/ic/nvmevar.h
> @@ -126,6 +126,11 @@ struct nvme_softc {
>  	struct scsi_iopool	sc_iopool;
>  	struct rwlock		sc_lock;
>  	struct scsibus_softc	*sc_scsibus;
> +
> +	struct ksensordev	sc_sensordev;
> +	struct ksensor		sc_temp_sensor;
> +	struct ksensor		sc_spare_sensor;
> +	struct ksensor		sc_usage_sensor;
>  };
>  
>  #define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)
> 
>