From: Mark Kettenis Subject: Re: nvme(4) sensors To: Jonathan Matthew Cc: tech@openbsd.org Date: Thu, 11 Jul 2024 16:03:32 +0200 > Date: Thu, 11 Jul 2024 15:49:27 +0200 > From: Jonathan Matthew > > This adds a basic set of sensors for nvme(4) showing device temperature > and overall health. Diff doesn't compile on arm64. Adding #include to dev/ic/nvmevar.h fixes that. > It looks like this: > > $ sysctl hw.sensors.nvme0 > hw.sensors.nvme0.temp0=42.85 degC, OK > hw.sensors.nvme0.percent0=0.00% (endurance used), OK > hw.sensors.nvme0.percent1=100.00% (available spare), OK > > If the temperature exceeds the device's threshold, temp0 status changes > to critical, and if the available spare capacity falls below the device's > threshold, percent1 status changes to critical. > > The nvme features used here have been mandatory since version 1.0 of > the specification, so it's reasonable to just assume they're available. hw.sensors.nvme0.temp0=38.85 degC, OK hw.sensors.nvme0.percent0=0.00% (endurance used), OK hw.sensors.nvme0.percent1=100.00% (available spare), OK > Do the sensor names make sense? Is refreshing them once per minute enough? Probably. Maybe if we want the hardware to self-protect we'd need to poll more often. Or use an interrupt if the hardware supports it. ok kettenis@ with the include issue fixed. > diff refs/heads/master refs/heads/nvmesensors > commit - c0b7aa147b16eeebb8c9dc6debf303af3c74b7d5 > commit + c2dfadfa63c1492fba8b60f352c0b10245d0b842 > blob - dcbf40187741abb9fb5614d19085336bdf5ca7c0 > blob + a479a5f410d9e1cc5747dab44c56ad56fd9879c6 > --- sys/dev/ic/nvme.c > +++ sys/dev/ic/nvme.c > @@ -60,6 +60,10 @@ void nvme_dumpregs(struct nvme_softc *); > int nvme_identify(struct nvme_softc *, u_int); > void nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *); > > +#ifndef SMALL_KERNEL > +void nvme_refresh_sensors(void *); > +#endif > + > int nvme_ccbs_alloc(struct nvme_softc *, u_int); > void nvme_ccbs_free(struct nvme_softc *, u_int); > > @@ -158,6 +162,7 @@ static const struct nvme_ops nvme_ops = { > #define NVME_TIMO_QOP 5000 /* ms to create/delete queue */ > #define NVME_TIMO_PT 5000 /* ms to complete passthrough */ > #define NVME_TIMO_IDENT 10000 /* ms to probe/identify */ > +#define NVME_TIMO_LOG_PAGE 5000 /* ms to read log pages */ > #define NVME_TIMO_DELAYNS 10 /* ns to delay() in poll loop */ > > /* > @@ -407,6 +412,31 @@ nvme_attach(struct nvme_softc *sc) > saa.saa_quirks = saa.saa_flags = 0; > saa.saa_wwpn = saa.saa_wwnn = 0; > > + strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname)); > + > +#ifndef SMALL_KERNEL > + sc->sc_temp_sensor.type = SENSOR_TEMP; > + sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN; > + sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor); > + > + sc->sc_usage_sensor.type = SENSOR_PERCENT; > + sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN; > + strlcpy(sc->sc_usage_sensor.desc, "endurance used", > + sizeof(sc->sc_usage_sensor.desc)); > + sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor); > + > + sc->sc_spare_sensor.type = SENSOR_PERCENT; > + sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN; > + strlcpy(sc->sc_spare_sensor.desc, "available spare", > + sizeof(sc->sc_spare_sensor.desc)); > + sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor); > + > + if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL) > + goto free_q; > + > + sensordev_install(&sc->sc_sensordev); > +#endif > + > sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, > &saa, scsiprint); > #if NBIO > 0 > @@ -2128,3 +2158,67 @@ nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_di > return 0; > } > #endif /* NBIO > 0 */ > + > +#ifndef SMALL_KERNEL > +void > +nvme_refresh_sensors(void *arg) > +{ > + struct nvme_softc *sc = arg; > + struct nvme_sqe sqe; > + struct nvme_dmamem *mem = NULL; > + struct nvme_ccb *ccb = NULL; > + struct nvm_smart_health *health; > + uint32_t dwlen; > + uint8_t cw; > + int flags; > + int rv = 0; > + > + ccb = nvme_ccb_get(sc); > + if (ccb == NULL) > + panic("nvme_refresh_sensors: nvme_ccb_get returned NULL"); > + > + mem = nvme_dmamem_alloc(sc, sizeof(*health)); > + if (mem == NULL) { > + rv = ENOMEM; > + goto done; > + } > + nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); > + > + dwlen = (sizeof(*health) >> 2) - 1; > + memset(&sqe, 0, sizeof(sqe)); > + sqe.opcode = NVM_ADMIN_GET_LOG_PG; > + htolem32(&sqe.nsid, 0xffffffff); > + htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH)); > + htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); > + > + ccb->ccb_done = nvme_empty_done; > + ccb->ccb_cookie = &sqe; > + flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE); > + > + nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); > + > + if (flags != 0) { > + sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN; > + sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN; > + sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN; > + } else { > + health = NVME_DMA_KVA(mem); > + cw = health->critical_warning; > + > + sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ? > + SENSOR_S_CRIT : SENSOR_S_OK; > + sc->sc_temp_sensor.value = health->temperature * 1000000; > + > + sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ? > + SENSOR_S_CRIT : SENSOR_S_OK; > + sc->sc_spare_sensor.value = health->avail_spare * 1000; > + > + sc->sc_usage_sensor.status = SENSOR_S_OK; > + sc->sc_usage_sensor.value = health->percent_used * 1000; > + } > + done: > + if (mem != NULL) > + nvme_dmamem_free(sc, mem); > + nvme_ccb_put(sc, ccb); > +} > +#endif /* SMALL_KERNEL */ > blob - 2a28c6af83ef74fce326515381d774767fd3152d > blob + 7ed5292bc2d6fff0ab4bf0a155d634930a6a255d > --- sys/dev/ic/nvmereg.h > +++ sys/dev/ic/nvmereg.h > @@ -415,3 +415,41 @@ struct nvm_identify_namespace { > > u_int8_t vs[3712]; > } __packed __aligned(8); > + > +#define NVM_LOG_PAGE_SMART_HEALTH 0x02 > +struct nvm_smart_health { > + u_int8_t critical_warning; > +#define NVM_HEALTH_CW_SPARE (1 << 0) > +#define NVM_HEALTH_CW_TEMP (1 << 1) > +#define NVM_HEALTH_CW_MEDIA (1 << 2) > +#define NVM_HEALTH_CW_READONLY (1 << 3) > +#define NVM_HEALTH_CW_VOLATILE (1 << 4) > +#define NVM_HEALTH_CW_PMR (1 << 5) > + u_int16_t temperature; > + u_int8_t avail_spare; > + u_int8_t avail_spare_threshold; > + u_int8_t percent_used; > + u_int8_t end_grp_summary; /* 1.4+ */ > + > + u_int8_t _reserved1[25]; > + > + u_int64_t data_units_read[2]; > + u_int64_t data_units_written[2]; > + u_int64_t host_read_commands[2]; > + u_int64_t host_write_commands[2]; > + u_int64_t busy_time[2]; > + u_int64_t power_cycles[2]; > + u_int64_t power_on_hours[2]; > + u_int64_t unsafe_shutdowns[2]; > + u_int64_t integrity_errors[2]; > + u_int64_t error_log_entries[2]; > + u_int32_t warn_temp_time; /* 1.2+ */ > + u_int32_t crit_temp_time; /* 1.2+ */ > + u_int16_t temp_sensors[8]; /* 1.2+ */ > + u_int32_t therm_mgmt_count_1; /* 1.3+ */ > + u_int32_t therm_mgmt_count_2; /* 1.3+ */ > + u_int32_t therm_mgmt_time_1; /* 1.3+ */ > + u_int32_t therm_mgmt_time_2; /* 1.3+ */ > + > + u_int8_t _reserved2[280]; > +} __packed __aligned(8); > blob - 15137057818e377544126b710de0c09a2f6b6541 > blob + 78f47ddd42f2e2b5166609b12160351a60f0c911 > --- sys/dev/ic/nvmevar.h > +++ sys/dev/ic/nvmevar.h > @@ -126,6 +126,11 @@ struct nvme_softc { > struct scsi_iopool sc_iopool; > struct rwlock sc_lock; > struct scsibus_softc *sc_scsibus; > + > + struct ksensordev sc_sensordev; > + struct ksensor sc_temp_sensor; > + struct ksensor sc_spare_sensor; > + struct ksensor sc_usage_sensor; > }; > > #define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname) > >