Download raw body.
nvme(4) sensors
Jonathan Matthew <jonathan@d14n.org> writes:
> This adds a basic set of sensors for nvme(4) showing device temperature
> and overall health.
>
> It looks like this:
>
> $ sysctl hw.sensors.nvme0
> hw.sensors.nvme0.temp0=42.85 degC, OK
> hw.sensors.nvme0.percent0=0.00% (endurance used), OK
> hw.sensors.nvme0.percent1=100.00% (available spare), OK
Oh nice, I was looking at something similar this week! Some feedback
below.
>
> If the temperature exceeds the device's threshold, temp0 status changes
> to critical, and if the available spare capacity falls below the device's
> threshold, percent1 status changes to critical.
I believe the threshold is also something that can be exposed but needs
to be read via a GetFeature admin command. I have a diff for that we can
add later. Would be helpful to know what the device is considering for
the warning and critical points...but from what I've seen on the two
devices I've tested this week, the critical points on my disks are
nonsensical (>300 C) while the warning ones seem good (~80 C).
>
> The nvme features used here have been mandatory since version 1.0 of
> the specification, so it's reasonable to just assume they're available.
>
> Do the sensor names make sense? Is refreshing them once per minute enough?
>
I've observed the composite temperature reading can change rapidly in
only a few seconds under load. It probably makes more sense to set this
lower.
>
> diff refs/heads/master refs/heads/nvmesensors
> commit - c0b7aa147b16eeebb8c9dc6debf303af3c74b7d5
> commit + c2dfadfa63c1492fba8b60f352c0b10245d0b842
> blob - dcbf40187741abb9fb5614d19085336bdf5ca7c0
> blob + a479a5f410d9e1cc5747dab44c56ad56fd9879c6
> --- sys/dev/ic/nvme.c
> +++ sys/dev/ic/nvme.c
> @@ -60,6 +60,10 @@ void nvme_dumpregs(struct nvme_softc *);
> int nvme_identify(struct nvme_softc *, u_int);
> void nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
>
> +#ifndef SMALL_KERNEL
> +void nvme_refresh_sensors(void *);
> +#endif
> +
> int nvme_ccbs_alloc(struct nvme_softc *, u_int);
> void nvme_ccbs_free(struct nvme_softc *, u_int);
>
> @@ -158,6 +162,7 @@ static const struct nvme_ops nvme_ops = {
> #define NVME_TIMO_QOP 5000 /* ms to create/delete queue */
> #define NVME_TIMO_PT 5000 /* ms to complete passthrough */
> #define NVME_TIMO_IDENT 10000 /* ms to probe/identify */
> +#define NVME_TIMO_LOG_PAGE 5000 /* ms to read log pages */
> #define NVME_TIMO_DELAYNS 10 /* ns to delay() in poll loop */
>
> /*
> @@ -407,6 +412,31 @@ nvme_attach(struct nvme_softc *sc)
> saa.saa_quirks = saa.saa_flags = 0;
> saa.saa_wwpn = saa.saa_wwnn = 0;
>
> + strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname));
> +
> +#ifndef SMALL_KERNEL
> + sc->sc_temp_sensor.type = SENSOR_TEMP;
> + sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
> + sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor);
> +
> + sc->sc_usage_sensor.type = SENSOR_PERCENT;
> + sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
> + strlcpy(sc->sc_usage_sensor.desc, "endurance used",
> + sizeof(sc->sc_usage_sensor.desc));
> + sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor);
> +
> + sc->sc_spare_sensor.type = SENSOR_PERCENT;
> + sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
> + strlcpy(sc->sc_spare_sensor.desc, "available spare",
> + sizeof(sc->sc_spare_sensor.desc));
> + sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor);
> +
> + if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL)
> + goto free_q;
> +
> + sensordev_install(&sc->sc_sensordev);
> +#endif
> +
> sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
> &saa, scsiprint);
> #if NBIO > 0
> @@ -2128,3 +2158,67 @@ nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_di
> return 0;
> }
> #endif /* NBIO > 0 */
> +
> +#ifndef SMALL_KERNEL
> +void
> +nvme_refresh_sensors(void *arg)
> +{
> + struct nvme_softc *sc = arg;
> + struct nvme_sqe sqe;
> + struct nvme_dmamem *mem = NULL;
> + struct nvme_ccb *ccb = NULL;
> + struct nvm_smart_health *health;
> + uint32_t dwlen;
> + uint8_t cw;
> + int flags;
> + int rv = 0;
> +
> + ccb = nvme_ccb_get(sc);
> + if (ccb == NULL)
> + panic("nvme_refresh_sensors: nvme_ccb_get returned NULL");
I'm confident this will panic on my Rock5b. I'd say if we can't get a
ccb we should just return. I say this confidently because I was playing
with using the sensors framework to look at the temperature on the nvme
in the rock5b and I hit that panic on most initial boots :)
> +
> + mem = nvme_dmamem_alloc(sc, sizeof(*health));
> + if (mem == NULL) {
> + rv = ENOMEM;
> + goto done;
> + }
> + nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
> +
> + dwlen = (sizeof(*health) >> 2) - 1;
> + memset(&sqe, 0, sizeof(sqe));
> + sqe.opcode = NVM_ADMIN_GET_LOG_PG;
> + htolem32(&sqe.nsid, 0xffffffff);
> + htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH));
> + htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
> +
> + ccb->ccb_done = nvme_empty_done;
> + ccb->ccb_cookie = &sqe;
> + flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE);
> +
> + nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
> +
> + if (flags != 0) {
> + sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
> + sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
> + sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
> + } else {
> + health = NVME_DMA_KVA(mem);
> + cw = health->critical_warning;
> +
> + sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ?
> + SENSOR_S_CRIT : SENSOR_S_OK;
> + sc->sc_temp_sensor.value = health->temperature * 1000000;
> +
> + sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ?
> + SENSOR_S_CRIT : SENSOR_S_OK;
> + sc->sc_spare_sensor.value = health->avail_spare * 1000;
> +
> + sc->sc_usage_sensor.status = SENSOR_S_OK;
> + sc->sc_usage_sensor.value = health->percent_used * 1000;
> + }
> + done:
> + if (mem != NULL)
> + nvme_dmamem_free(sc, mem);
> + nvme_ccb_put(sc, ccb);
> +}
> +#endif /* SMALL_KERNEL */
> blob - 2a28c6af83ef74fce326515381d774767fd3152d
> blob + 7ed5292bc2d6fff0ab4bf0a155d634930a6a255d
> --- sys/dev/ic/nvmereg.h
> +++ sys/dev/ic/nvmereg.h
> @@ -415,3 +415,41 @@ struct nvm_identify_namespace {
>
> u_int8_t vs[3712];
> } __packed __aligned(8);
> +
> +#define NVM_LOG_PAGE_SMART_HEALTH 0x02
> +struct nvm_smart_health {
> + u_int8_t critical_warning;
> +#define NVM_HEALTH_CW_SPARE (1 << 0)
> +#define NVM_HEALTH_CW_TEMP (1 << 1)
> +#define NVM_HEALTH_CW_MEDIA (1 << 2)
> +#define NVM_HEALTH_CW_READONLY (1 << 3)
> +#define NVM_HEALTH_CW_VOLATILE (1 << 4)
> +#define NVM_HEALTH_CW_PMR (1 << 5)
> + u_int16_t temperature;
> + u_int8_t avail_spare;
> + u_int8_t avail_spare_threshold;
> + u_int8_t percent_used;
> + u_int8_t end_grp_summary; /* 1.4+ */
> +
> + u_int8_t _reserved1[25];
> +
> + u_int64_t data_units_read[2];
> + u_int64_t data_units_written[2];
> + u_int64_t host_read_commands[2];
> + u_int64_t host_write_commands[2];
> + u_int64_t busy_time[2];
> + u_int64_t power_cycles[2];
> + u_int64_t power_on_hours[2];
> + u_int64_t unsafe_shutdowns[2];
> + u_int64_t integrity_errors[2];
> + u_int64_t error_log_entries[2];
> + u_int32_t warn_temp_time; /* 1.2+ */
> + u_int32_t crit_temp_time; /* 1.2+ */
> + u_int16_t temp_sensors[8]; /* 1.2+ */
> + u_int32_t therm_mgmt_count_1; /* 1.3+ */
> + u_int32_t therm_mgmt_count_2; /* 1.3+ */
> + u_int32_t therm_mgmt_time_1; /* 1.3+ */
> + u_int32_t therm_mgmt_time_2; /* 1.3+ */
> +
> + u_int8_t _reserved2[280];
> +} __packed __aligned(8);
> blob - 15137057818e377544126b710de0c09a2f6b6541
> blob + 78f47ddd42f2e2b5166609b12160351a60f0c911
> --- sys/dev/ic/nvmevar.h
> +++ sys/dev/ic/nvmevar.h
> @@ -126,6 +126,11 @@ struct nvme_softc {
> struct scsi_iopool sc_iopool;
> struct rwlock sc_lock;
> struct scsibus_softc *sc_scsibus;
> +
> + struct ksensordev sc_sensordev;
> + struct ksensor sc_temp_sensor;
> + struct ksensor sc_spare_sensor;
> + struct ksensor sc_usage_sensor;
> };
>
> #define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)
nvme(4) sensors