Download raw body.
nvme(4) sensors
This adds a basic set of sensors for nvme(4) showing device temperature
and overall health.
It looks like this:
$ sysctl hw.sensors.nvme0
hw.sensors.nvme0.temp0=42.85 degC, OK
hw.sensors.nvme0.percent0=0.00% (endurance used), OK
hw.sensors.nvme0.percent1=100.00% (available spare), OK
If the temperature exceeds the device's threshold, temp0 status changes
to critical, and if the available spare capacity falls below the device's
threshold, percent1 status changes to critical.
The nvme features used here have been mandatory since version 1.0 of
the specification, so it's reasonable to just assume they're available.
Do the sensor names make sense? Is refreshing them once per minute enough?
diff refs/heads/master refs/heads/nvmesensors
commit - c0b7aa147b16eeebb8c9dc6debf303af3c74b7d5
commit + c2dfadfa63c1492fba8b60f352c0b10245d0b842
blob - dcbf40187741abb9fb5614d19085336bdf5ca7c0
blob + a479a5f410d9e1cc5747dab44c56ad56fd9879c6
--- sys/dev/ic/nvme.c
+++ sys/dev/ic/nvme.c
@@ -60,6 +60,10 @@ void nvme_dumpregs(struct nvme_softc *);
int nvme_identify(struct nvme_softc *, u_int);
void nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
+#ifndef SMALL_KERNEL
+void nvme_refresh_sensors(void *);
+#endif
+
int nvme_ccbs_alloc(struct nvme_softc *, u_int);
void nvme_ccbs_free(struct nvme_softc *, u_int);
@@ -158,6 +162,7 @@ static const struct nvme_ops nvme_ops = {
#define NVME_TIMO_QOP 5000 /* ms to create/delete queue */
#define NVME_TIMO_PT 5000 /* ms to complete passthrough */
#define NVME_TIMO_IDENT 10000 /* ms to probe/identify */
+#define NVME_TIMO_LOG_PAGE 5000 /* ms to read log pages */
#define NVME_TIMO_DELAYNS 10 /* ns to delay() in poll loop */
/*
@@ -407,6 +412,31 @@ nvme_attach(struct nvme_softc *sc)
saa.saa_quirks = saa.saa_flags = 0;
saa.saa_wwpn = saa.saa_wwnn = 0;
+ strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname));
+
+#ifndef SMALL_KERNEL
+ sc->sc_temp_sensor.type = SENSOR_TEMP;
+ sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
+ sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor);
+
+ sc->sc_usage_sensor.type = SENSOR_PERCENT;
+ sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
+ strlcpy(sc->sc_usage_sensor.desc, "endurance used",
+ sizeof(sc->sc_usage_sensor.desc));
+ sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor);
+
+ sc->sc_spare_sensor.type = SENSOR_PERCENT;
+ sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
+ strlcpy(sc->sc_spare_sensor.desc, "available spare",
+ sizeof(sc->sc_spare_sensor.desc));
+ sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor);
+
+ if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL)
+ goto free_q;
+
+ sensordev_install(&sc->sc_sensordev);
+#endif
+
sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
&saa, scsiprint);
#if NBIO > 0
@@ -2128,3 +2158,67 @@ nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_di
return 0;
}
#endif /* NBIO > 0 */
+
+#ifndef SMALL_KERNEL
+void
+nvme_refresh_sensors(void *arg)
+{
+ struct nvme_softc *sc = arg;
+ struct nvme_sqe sqe;
+ struct nvme_dmamem *mem = NULL;
+ struct nvme_ccb *ccb = NULL;
+ struct nvm_smart_health *health;
+ uint32_t dwlen;
+ uint8_t cw;
+ int flags;
+ int rv = 0;
+
+ ccb = nvme_ccb_get(sc);
+ if (ccb == NULL)
+ panic("nvme_refresh_sensors: nvme_ccb_get returned NULL");
+
+ mem = nvme_dmamem_alloc(sc, sizeof(*health));
+ if (mem == NULL) {
+ rv = ENOMEM;
+ goto done;
+ }
+ nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
+
+ dwlen = (sizeof(*health) >> 2) - 1;
+ memset(&sqe, 0, sizeof(sqe));
+ sqe.opcode = NVM_ADMIN_GET_LOG_PG;
+ htolem32(&sqe.nsid, 0xffffffff);
+ htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH));
+ htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
+
+ ccb->ccb_done = nvme_empty_done;
+ ccb->ccb_cookie = &sqe;
+ flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE);
+
+ nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
+
+ if (flags != 0) {
+ sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN;
+ sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN;
+ sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN;
+ } else {
+ health = NVME_DMA_KVA(mem);
+ cw = health->critical_warning;
+
+ sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ?
+ SENSOR_S_CRIT : SENSOR_S_OK;
+ sc->sc_temp_sensor.value = health->temperature * 1000000;
+
+ sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ?
+ SENSOR_S_CRIT : SENSOR_S_OK;
+ sc->sc_spare_sensor.value = health->avail_spare * 1000;
+
+ sc->sc_usage_sensor.status = SENSOR_S_OK;
+ sc->sc_usage_sensor.value = health->percent_used * 1000;
+ }
+ done:
+ if (mem != NULL)
+ nvme_dmamem_free(sc, mem);
+ nvme_ccb_put(sc, ccb);
+}
+#endif /* SMALL_KERNEL */
blob - 2a28c6af83ef74fce326515381d774767fd3152d
blob + 7ed5292bc2d6fff0ab4bf0a155d634930a6a255d
--- sys/dev/ic/nvmereg.h
+++ sys/dev/ic/nvmereg.h
@@ -415,3 +415,41 @@ struct nvm_identify_namespace {
u_int8_t vs[3712];
} __packed __aligned(8);
+
+#define NVM_LOG_PAGE_SMART_HEALTH 0x02
+struct nvm_smart_health {
+ u_int8_t critical_warning;
+#define NVM_HEALTH_CW_SPARE (1 << 0)
+#define NVM_HEALTH_CW_TEMP (1 << 1)
+#define NVM_HEALTH_CW_MEDIA (1 << 2)
+#define NVM_HEALTH_CW_READONLY (1 << 3)
+#define NVM_HEALTH_CW_VOLATILE (1 << 4)
+#define NVM_HEALTH_CW_PMR (1 << 5)
+ u_int16_t temperature;
+ u_int8_t avail_spare;
+ u_int8_t avail_spare_threshold;
+ u_int8_t percent_used;
+ u_int8_t end_grp_summary; /* 1.4+ */
+
+ u_int8_t _reserved1[25];
+
+ u_int64_t data_units_read[2];
+ u_int64_t data_units_written[2];
+ u_int64_t host_read_commands[2];
+ u_int64_t host_write_commands[2];
+ u_int64_t busy_time[2];
+ u_int64_t power_cycles[2];
+ u_int64_t power_on_hours[2];
+ u_int64_t unsafe_shutdowns[2];
+ u_int64_t integrity_errors[2];
+ u_int64_t error_log_entries[2];
+ u_int32_t warn_temp_time; /* 1.2+ */
+ u_int32_t crit_temp_time; /* 1.2+ */
+ u_int16_t temp_sensors[8]; /* 1.2+ */
+ u_int32_t therm_mgmt_count_1; /* 1.3+ */
+ u_int32_t therm_mgmt_count_2; /* 1.3+ */
+ u_int32_t therm_mgmt_time_1; /* 1.3+ */
+ u_int32_t therm_mgmt_time_2; /* 1.3+ */
+
+ u_int8_t _reserved2[280];
+} __packed __aligned(8);
blob - 15137057818e377544126b710de0c09a2f6b6541
blob + 78f47ddd42f2e2b5166609b12160351a60f0c911
--- sys/dev/ic/nvmevar.h
+++ sys/dev/ic/nvmevar.h
@@ -126,6 +126,11 @@ struct nvme_softc {
struct scsi_iopool sc_iopool;
struct rwlock sc_lock;
struct scsibus_softc *sc_scsibus;
+
+ struct ksensordev sc_sensordev;
+ struct ksensor sc_temp_sensor;
+ struct ksensor sc_spare_sensor;
+ struct ksensor sc_usage_sensor;
};
#define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)
nvme(4) sensors