From: Tobias Heider Subject: scmi(4): add mbox transport and perf protocol To: tech@openbsd.org Cc: kettenis@openbsd.org, patrick@openbsd.org, landry@openbsd.org Date: Mon, 18 Nov 2024 01:01:11 +0100 Here is the promised diff to get scmi for cpu frequency management working on the Snapdragon X Elite. Roughly speaking this adds two big new features: mailbox transport and the scmi perf protocol. Mailbox transport is very similar to smc which we already support. Messages are written to a shared memory region, instead of an SMC call we use a mailbox doorbell to notify the platform that our request is ready. On X Elites this is done using the CPUCP mailbox which we support via qccpucp(4). The attach function for the mbox transport is deferred to make sure our mailbox driver is available when we need it. There are a lot of possible optimization here, instead of polling we could use mailbox interrupts and for PERFORMANCE_LEVEL_GET we probably want to switch to using a fast channel at some point. The perf protocol allows us to read and set the performance level of our performance domains. On the X Elite we have 3 domains, each spanning 4 cpu cores. This patch adds sensors to expose the current frequency and power consumption for each domain. It is a bit pointless since the level doesn't really change currently. Eventually we probably want to hook this up to cpu(4) to expose it to apm(8). The output on my T14s looks like this: $ sysctl | grep scmi hw.sensors.scmi0.power0=0.22 W hw.sensors.scmi0.power1=0.29 W hw.sensors.scmi0.power2=0.31 W hw.sensors.scmi0.frequency0=2976000000.00 Hz hw.sensors.scmi0.frequency1=3417600000.00 Hz hw.sensors.scmi0.frequency2=3417600000.00 Hz Test feedback and reviews welcome. I don't have a machine using scmi-smc or the clock protocol so I'd appreciate if someone could test those too. diff a921796a245d04e3f9e9aeb92b328e0f67f5f697 8fdeb116002858bf25bd6d5171e9e91c647099f2 commit - a921796a245d04e3f9e9aeb92b328e0f67f5f697 commit + 8fdeb116002858bf25bd6d5171e9e91c647099f2 blob - 5c567e567920508ecda4cc718168b85cc356898b blob + ac6c78a53ee5f7369835f267aff83d11dc9fed02 --- sys/dev/fdt/scmi.c +++ sys/dev/fdt/scmi.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2023 Mark Kettenis + * Copyright (c) 2024 Tobias Heider * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,12 +20,16 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include #include #include @@ -49,6 +54,7 @@ struct scmi_shmem { /* Protocols */ #define SCMI_BASE 0x10 +#define SCMI_PERF 0x13 #define SCMI_CLOCK 0x14 /* Common messages */ @@ -64,6 +70,24 @@ struct scmi_shmem { #define SCMI_CLOCK_CONFIG_SET 0x7 #define SCMI_CLOCK_CONFIG_SET_ENABLE (1 << 0) +/* Performance management messages */ +#define SCMI_PERF_DOMAIN_ATTRIBUTES 0x3 +#define SCMI_PERF_DESCRIBE_LEVELS 0x4 +#define SCMI_PERF_LEVEL_GET 0x8 + +struct scmi_40_resp_perf_describe_levels { + uint16_t pl_nret; + uint16_t pl_nrem; + struct { + uint32_t pe_perf; + uint32_t pe_cost; + uint16_t pe_latency; + uint16_t pe_reserved; + uint32_t pe_ifreq; + uint32_t pe_lindex; + } pl_entry[]; +}; + static inline void scmi_message_header(volatile struct scmi_shmem *shmem, uint32_t protocol_id, uint32_t message_id) @@ -71,20 +95,62 @@ scmi_message_header(volatile struct scmi_shmem *shmem, shmem->message_header = (protocol_id << 10) | (message_id << 0); } +struct scmi_ops { + int32_t (*so_command)(void *); +}; +struct scmi_perf_level { + uint32_t pl_perf; + uint32_t pl_cost; + uint32_t pl_ifreq; +}; + +struct scmi_perf_domain { + size_t pd_nlevels; + struct scmi_perf_level *pd_levels; + int pd_curlevel; +}; + struct scmi_softc { struct device sc_dev; bus_space_tag_t sc_iot; - bus_space_handle_t sc_ioh; - volatile struct scmi_shmem *sc_shmem; + int sc_node; + bus_space_handle_t sc_ioh_tx; + bus_space_handle_t sc_ioh_rx; + volatile struct scmi_shmem *sc_shmem_tx; + volatile struct scmi_shmem *sc_shmem_rx; + uint32_t sc_smc_id; + struct mbox_channel *sc_mc_tx; + struct mbox_channel *sc_mc_rx; + uint16_t sc_ver_major; + uint16_t sc_ver_minor; + + /* SCMI_CLOCK */ struct clock_device sc_cd; + + /* SCMI_PERF */ + int sc_perf_power_unit; +#define SCMI_POWER_UNIT_UW 0x2 +#define SCMI_POWER_UNIT_MW 0x1 +#define SCMI_POWER_UNIT_NONE 0x0 + size_t sc_perf_ndomains; + struct scmi_perf_domain *sc_perf_domains; + + struct ksensordev sc_perf_sensordev; + struct ksensordev sc_perf_psensordev; + struct ksensor *sc_perf_fsensors; + struct ksensor *sc_perf_psensors; + + struct scmi_ops sc_so; }; int scmi_match(struct device *, void *, void *); void scmi_attach(struct device *, struct device *, void *); +int scmi_attach_smc(struct scmi_softc *, struct fdt_attach_args *); +void scmi_attach_mbox_deferred(struct device *); const struct cfattach scmi_ca = { sizeof(struct scmi_softc), scmi_match, scmi_attach @@ -96,22 +162,42 @@ struct cfdriver scmi_cd = { void scmi_attach_proto(struct scmi_softc *, int); void scmi_attach_clock(struct scmi_softc *, int); -int32_t scmi_command(struct scmi_softc *); +void scmi_attach_perf(struct scmi_softc *, int); +int32_t scmi_smc_command(void *); +int32_t scmi_mbox_command(void *); + int scmi_match(struct device *parent, void *match, void *aux) { struct fdt_attach_args *faa = aux; - return OF_is_compatible(faa->fa_node, "arm,scmi-smc"); + return OF_is_compatible(faa->fa_node, "arm,scmi-smc") || + OF_is_compatible(faa->fa_node, "arm,scmi"); } void scmi_attach(struct device *parent, struct device *self, void *aux) { struct scmi_softc *sc = (struct scmi_softc *)self; + struct fdt_attach_args *faa = aux; + + sc->sc_iot = faa->fa_iot; + sc->sc_node = faa->fa_node; + + if (OF_is_compatible(faa->fa_node, "arm,scmi-scm")) { + scmi_attach_smc(sc, faa); + } else if (OF_is_compatible(faa->fa_node, "arm,scmi")) { + printf("\n"); + /* Defer because we need the mailbox driver attached first */ + config_defer(self, scmi_attach_mbox_deferred); + } +} + +int +scmi_attach_smc(struct scmi_softc *sc, struct fdt_attach_args *faa) +{ volatile struct scmi_shmem *shmem; - struct fdt_attach_args *faa = aux; struct fdt_reg reg; int32_t status; uint32_t version; @@ -119,53 +205,143 @@ scmi_attach(struct device *parent, struct device *self void *node; int proto; - phandle = OF_getpropint(faa->fa_node, "shmem", 0); - node = fdt_find_phandle(phandle); - if (node == NULL || !fdt_is_compatible(node, "arm,scmi-shmem") || - fdt_get_reg(node, 0, ®)) { - printf(": no shared memory\n"); - return; - } - sc->sc_smc_id = OF_getpropint(faa->fa_node, "arm,smc-id", 0); if (sc->sc_smc_id == 0) { printf(": no SMC id\n"); - return; + return -1; } - sc->sc_iot = faa->fa_iot; + phandle = OF_getpropint(faa->fa_node, "shmem", 0); + node = fdt_find_phandle(phandle); + if (node == NULL || !fdt_is_compatible(node, "arm,scmi-shmem") || + fdt_get_reg(node, 0, ®)) { + printf(": no shared memory\n"); + return -1; + } + if (bus_space_map(sc->sc_iot, reg.addr, - reg.size, 0, &sc->sc_ioh)) { + reg.size, 0, &sc->sc_ioh_tx)) { printf(": can't map shared memory\n"); - return; + return -1; } - sc->sc_shmem = bus_space_vaddr(sc->sc_iot, sc->sc_ioh); - shmem = sc->sc_shmem; + sc->sc_shmem_tx = bus_space_vaddr(sc->sc_iot, sc->sc_ioh_tx); + shmem = sc->sc_shmem_tx; + sc->sc_so.so_command = scmi_smc_command; + if ((shmem->channel_status & SCMI_CHANNEL_FREE) == 0) { printf(": channel busy\n"); - return; + return -1; } scmi_message_header(shmem, SCMI_BASE, SCMI_PROTOCOL_VERSION); shmem->length = sizeof(uint32_t); - status = scmi_command(sc); + status = sc->sc_so.so_command(sc); if (status != SCMI_SUCCESS) { printf(": protocol version command failed\n"); - return; + return -1; } version = shmem->message_payload[1]; - printf(": SCMI %d.%d\n", version >> 16, version & 0xffff); + sc->sc_ver_major = version >> 16; + sc->sc_ver_minor = version & 0xfffff; + printf(": SCMI %d.%d\n", sc->sc_ver_major, sc->sc_ver_minor); for (proto = OF_child(faa->fa_node); proto; proto = OF_peer(proto)) scmi_attach_proto(sc, proto); + + return 0; } +void +scmi_attach_mbox_deferred(struct device *self) +{ + struct scmi_softc *sc = (struct scmi_softc *)self; + uint32_t *shmems; + int32_t status; + uint32_t version; + struct fdt_reg reg; + int len; + void *node; + int proto; + + /* we only support the 2 mbox / 2 shmem case */ + len = OF_getproplen(sc->sc_node, "mboxes"); + if (len != 4 * sizeof(uint32_t)) { + printf("%s: invalid number of mboxes\n", sc->sc_dev.dv_xname); + return; + } + + len = OF_getproplen(sc->sc_node, "shmem"); + if (len != 2 * sizeof(uint32_t)) { + printf("%s: invalid number of shmems\n", sc->sc_dev.dv_xname); + return; + } + + shmems = malloc(len, M_DEVBUF, M_WAITOK); + OF_getpropintarray(sc->sc_node, "shmem", shmems, len); + + sc->sc_mc_tx = mbox_channel(sc->sc_node, "tx", NULL); + if (sc->sc_mc_tx == NULL) { + printf("%s: no tx mbox\n", sc->sc_dev.dv_xname); + return; + } + sc->sc_mc_rx = mbox_channel(sc->sc_node, "rx", NULL); + if (sc->sc_mc_rx == NULL) { + printf("%s: no rx mbox\n", sc->sc_dev.dv_xname); + return; + } + + node = fdt_find_phandle(shmems[0]); + if (node == NULL || !fdt_is_compatible(node, "arm,scmi-shmem") || + fdt_get_reg(node, 0, ®)) { + printf("%s: no shared memory\n", sc->sc_dev.dv_xname); + return; + } + if (bus_space_map(sc->sc_iot, reg.addr, reg.size, 0, &sc->sc_ioh_tx)) { + printf("%s: can't map shared memory\n", sc->sc_dev.dv_xname); + return; + } + sc->sc_shmem_tx = bus_space_vaddr(sc->sc_iot, sc->sc_ioh_tx); + + node = fdt_find_phandle(shmems[1]); + if (node == NULL || !fdt_is_compatible(node, "arm,scmi-shmem") || + fdt_get_reg(node, 0, ®)) { + printf("%s: no shared memory\n", sc->sc_dev.dv_xname); + return; + } + if (bus_space_map(sc->sc_iot, reg.addr, reg.size, 0, &sc->sc_ioh_rx)) { + printf("%s: can't map shared memory\n", sc->sc_dev.dv_xname); + return; + } + sc->sc_shmem_rx = bus_space_vaddr(sc->sc_iot, sc->sc_ioh_rx); + + sc->sc_so.so_command = scmi_mbox_command; + + scmi_message_header(sc->sc_shmem_tx, SCMI_BASE, SCMI_PROTOCOL_VERSION); + sc->sc_shmem_tx->length = sizeof(uint32_t); + status = sc->sc_so.so_command(sc); + if (status != SCMI_SUCCESS) { + printf("%s: protocol version command failed\n", + sc->sc_dev.dv_xname); + return; + } + + version = sc->sc_shmem_tx->message_payload[1]; + sc->sc_ver_major = version >> 16; + sc->sc_ver_minor = version & 0xfffff; + printf("%s: SCMI %d.%d\n", sc->sc_dev.dv_xname, sc->sc_ver_major, + sc->sc_ver_minor); + + for (proto = OF_child(sc->sc_node); proto; proto = OF_peer(proto)) + scmi_attach_proto(sc, proto); +} + int32_t -scmi_command(struct scmi_softc *sc) +scmi_smc_command(void *arg) { - volatile struct scmi_shmem *shmem = sc->sc_shmem; + struct scmi_softc *sc = arg; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; int32_t status; shmem->channel_status = 0; @@ -179,6 +355,33 @@ scmi_command(struct scmi_softc *sc) return shmem->message_payload[0]; } +int32_t +scmi_mbox_command(void *arg) +{ + struct scmi_softc *sc = arg; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; + int ret; + int i; + + shmem->channel_status = 0; + ret = mbox_send(sc->sc_mc_tx, NULL, 0); + if (ret != 0) + return SCMI_NOT_SUPPORTED; + + /* XXX: poll for now */ + for (i = 0; i < 20; i++) { + if (shmem->channel_status & SCMI_CHANNEL_FREE) + break; + delay(10); + } + if ((shmem->channel_status & SCMI_CHANNEL_ERROR)) + return SCMI_COMMS_ERROR; + if ((shmem->channel_status & SCMI_CHANNEL_FREE) == 0) + return SCMI_BUSY; + + return shmem->message_payload[0]; +} + void scmi_attach_proto(struct scmi_softc *sc, int node) { @@ -186,6 +389,9 @@ scmi_attach_proto(struct scmi_softc *sc, int node) case SCMI_CLOCK: scmi_attach_clock(sc, node); break; + case SCMI_PERF: + scmi_attach_perf(sc, node); + break; default: break; } @@ -200,13 +406,13 @@ int scmi_clock_set_frequency(void *, uint32_t *, uint3 void scmi_attach_clock(struct scmi_softc *sc, int node) { - volatile struct scmi_shmem *shmem = sc->sc_shmem; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; int32_t status; int nclocks; scmi_message_header(shmem, SCMI_CLOCK, SCMI_PROTOCOL_ATTRIBUTES); shmem->length = sizeof(uint32_t); - status = scmi_command(sc); + status = sc->sc_so.so_command(sc); if (status != SCMI_SUCCESS) return; @@ -226,28 +432,28 @@ void scmi_clock_enable(void *cookie, uint32_t *cells, int on) { struct scmi_softc *sc = cookie; - volatile struct scmi_shmem *shmem = sc->sc_shmem; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; uint32_t idx = cells[0]; scmi_message_header(shmem, SCMI_CLOCK, SCMI_CLOCK_CONFIG_SET); shmem->length = 3 * sizeof(uint32_t); shmem->message_payload[0] = idx; shmem->message_payload[1] = on ? SCMI_CLOCK_CONFIG_SET_ENABLE : 0; - scmi_command(sc); + sc->sc_so.so_command(sc); } uint32_t scmi_clock_get_frequency(void *cookie, uint32_t *cells) { struct scmi_softc *sc = cookie; - volatile struct scmi_shmem *shmem = sc->sc_shmem; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; uint32_t idx = cells[0]; int32_t status; scmi_message_header(shmem, SCMI_CLOCK, SCMI_CLOCK_RATE_GET); shmem->length = 2 * sizeof(uint32_t); shmem->message_payload[0] = idx; - status = scmi_command(sc); + status = sc->sc_so.so_command(sc); if (status != SCMI_SUCCESS) return 0; if (shmem->message_payload[2] != 0) @@ -260,7 +466,7 @@ int scmi_clock_set_frequency(void *cookie, uint32_t *cells, uint32_t freq) { struct scmi_softc *sc = cookie; - volatile struct scmi_shmem *shmem = sc->sc_shmem; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; uint32_t idx = cells[0]; int32_t status; @@ -270,9 +476,188 @@ scmi_clock_set_frequency(void *cookie, uint32_t *cells shmem->message_payload[1] = idx; shmem->message_payload[2] = freq; shmem->message_payload[3] = 0; - status = scmi_command(sc); + status = sc->sc_so.so_command(sc); if (status != SCMI_SUCCESS) return -1; return 0; } + +/* Performance management */ +void scmi_perf_descr_levels(struct scmi_softc *, int); +void scmi_perf_refresh_sensor(void *); + +void +scmi_attach_perf(struct scmi_softc *sc, int node) +{ + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; + int32_t status; + uint32_t version; + int i; + + scmi_message_header(sc->sc_shmem_tx, SCMI_PERF, SCMI_PROTOCOL_VERSION); + sc->sc_shmem_tx->length = sizeof(uint32_t); + status = sc->sc_so.so_command(sc); + if (status != SCMI_SUCCESS) { + printf("%s: SCMI_PROTOCOL_VERSION failed\n", + sc->sc_dev.dv_xname); + return; + } + + version = shmem->message_payload[1]; + if (version != 0x40000) { + printf("%s: invalid perf protocol version (0x%x != 0x4000)", + sc->sc_dev.dv_xname, version); + return; + } + + scmi_message_header(shmem, SCMI_PERF, SCMI_PROTOCOL_ATTRIBUTES); + shmem->length = sizeof(uint32_t); + status = sc->sc_so.so_command(sc); + if (status != SCMI_SUCCESS) { + printf("%s: SCMI_PROTOCOL_ATTRIBUTES failed\n", + sc->sc_dev.dv_xname); + return; + } + + sc->sc_perf_ndomains = shmem->message_payload[1] & 0xffff; + sc->sc_perf_domains = malloc(sc->sc_perf_ndomains * + sizeof(struct scmi_perf_domain), M_DEVBUF, M_ZERO | M_WAITOK); + sc->sc_perf_power_unit = (shmem->message_payload[1] >> 16) & 0x3; + + strlcpy(sc->sc_perf_sensordev.xname, sc->sc_dev.dv_xname, + sizeof(sc->sc_perf_sensordev.xname)); + + sc->sc_perf_fsensors = + malloc(sc->sc_perf_ndomains * sizeof(struct ksensor), + M_DEVBUF, M_ZERO | M_WAITOK); + sc->sc_perf_psensors = + malloc(sc->sc_perf_ndomains * sizeof(struct ksensor), + M_DEVBUF, M_ZERO | M_WAITOK); + + /* Add one frequency sensor per perf domain */ + for (i = 0; i < sc->sc_perf_ndomains; i++) { + scmi_message_header(shmem, SCMI_PERF, + SCMI_PERF_DOMAIN_ATTRIBUTES); + shmem->length = 2 * sizeof(uint32_t); + shmem->message_payload[0] = i; + status = sc->sc_so.so_command(sc); + if (status != SCMI_SUCCESS) { + printf("%s: SCMI_PERF_DOMAIN_ATTRIBUTES failed\n", + sc->sc_dev.dv_xname); + goto err; + } + + scmi_perf_descr_levels(sc, i); + + sc->sc_perf_fsensors[i].type = SENSOR_FREQ; + sensor_attach(&sc->sc_perf_sensordev, &sc->sc_perf_fsensors[i]); + sc->sc_perf_psensors[i].type = SENSOR_WATTS; + sensor_attach(&sc->sc_perf_sensordev, &sc->sc_perf_psensors[i]); + } + sensordev_install(&sc->sc_perf_sensordev); + sensor_task_register(sc, scmi_perf_refresh_sensor, 1); + return; +err: + free(sc->sc_perf_fsensors, M_DEVBUF, + sc->sc_perf_ndomains * sizeof(struct ksensor)); + free(sc->sc_perf_psensors, M_DEVBUF, + sc->sc_perf_ndomains * sizeof(struct ksensor)); +} + +void +scmi_perf_descr_levels(struct scmi_softc *sc, int domain) +{ + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; + volatile struct scmi_40_resp_perf_describe_levels *pl; + struct scmi_perf_domain *pd = &sc->sc_perf_domains[domain]; + int status, i, idx; + + idx = 0; + do { + scmi_message_header(shmem, SCMI_PERF, + SCMI_PERF_DESCRIBE_LEVELS); + shmem->length = sizeof(uint32_t) * 3; + shmem->message_payload[0] = domain; + shmem->message_payload[1] = idx; + status = sc->sc_so.so_command(sc); + if (status != SCMI_SUCCESS) { + printf("%s: SCMI_PERF_DESCRIBE_LEVELS failed\n", + sc->sc_dev.dv_xname); + return; + } + + pl = (struct scmi_40_resp_perf_describe_levels *) + &shmem->message_payload[1]; + + if (pd->pd_levels == NULL) { + pd->pd_nlevels = pl->pl_nret + pl->pl_nrem; + pd->pd_levels = malloc(pd->pd_nlevels * + sizeof(struct scmi_perf_level), + M_DEVBUF, M_ZERO | M_WAITOK); + } + + for (i = 0; i < pl->pl_nret; i++) { + pd->pd_levels[idx + i].pl_cost = + pl->pl_entry[i].pe_cost; + pd->pd_levels[idx + i].pl_perf = + pl->pl_entry[i].pe_perf; + pd->pd_levels[idx + i].pl_ifreq = + pl->pl_entry[i].pe_ifreq; + } + idx += pl->pl_nret; + } while (pl->pl_nrem); +} + +void +scmi_perf_refresh_sensor(void *arg) +{ + struct scmi_softc *sc = arg; + volatile struct scmi_shmem *shmem = sc->sc_shmem_tx; + uint64_t power_cost; + int32_t status; + int level, i; + + if (sc->sc_perf_domains == NULL) + return; + + for (i = 0; i < sc->sc_perf_ndomains; i++) { + if (sc->sc_perf_domains[i].pd_levels == NULL) + return; + + scmi_message_header(shmem, SCMI_PERF, + SCMI_PERF_LEVEL_GET); + shmem->length = sizeof(uint32_t) * 2; + shmem->message_payload[0] = i; + status = sc->sc_so.so_command(sc); + if (status != SCMI_SUCCESS) { + printf("%s: SCMI_PERF_LEVEL_GET failed\n", + sc->sc_dev.dv_xname); + return; + } + + level = shmem->message_payload[1]; + if (sc->sc_perf_fsensors == NULL || + sc->sc_perf_psensors == NULL) + return; + + sc->sc_perf_domains[i].pd_curlevel = level; + sc->sc_perf_fsensors[i].value = + (uint64_t)sc->sc_perf_domains[i]. + pd_levels[level].pl_ifreq * 1000000000; + + switch (sc->sc_perf_power_unit) { + case SCMI_POWER_UNIT_UW: + power_cost = (uint64_t)sc->sc_perf_domains[i]. + pd_levels[level].pl_cost; + break; + case SCMI_POWER_UNIT_MW: + power_cost = (uint64_t)sc->sc_perf_domains[i]. + pd_levels[level].pl_cost * 1000; + break; + default: + continue; + } + sc->sc_perf_psensors[i].value = power_cost; + } +}