From: Miguel Landaeta Subject: [4/5] vmd: generate and expose ACPI tables to the guest via fw_cfg To: tech@openbsd.org Date: Tue, 9 Jun 2026 13:27:45 +0000 This is a series of commits to expose ACPI tables to vmd guests. These were committed and tested individually. They can be reviewed in their entirety here: https://github.com/openbsd/src/compare/master...nomadium:src:add-support-for-acpi-in-vmd.patch Generate a minimal ACPI table set (RSDP, XSDT, MADT, FADT, DSDT) and expose it to SeaBIOS guests via the QEMU fw_cfg table-loader protocol (etc/acpi/rsdp, etc/acpi/tables, and etc/table-loader fw_cfg files). --- usr.sbin/vmd/Makefile | 1 + usr.sbin/vmd/acpi.c | 567 ++++++++++++++++++++++++++++++++++++ usr.sbin/vmd/acpi.h | 228 +++++++++++++++ usr.sbin/vmd/fw_cfg.c | 3 + usr.sbin/vmd/fw_cfg.h | 4 + usr.sbin/vmd/loadfile_elf.c | 3 + 6 files changed, 806 insertions(+) create mode 100644 usr.sbin/vmd/acpi.c create mode 100644 usr.sbin/vmd/acpi.h diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile index bc7a159defc..153fb5f93c7 100644 --- a/usr.sbin/vmd/Makefile +++ b/usr.sbin/vmd/Makefile @@ -9,6 +9,7 @@ SRCS+= vioscsi.c vioraw.c vioqcow2.c vm_agentx.c vioblk.c SRCS+= vionet.c .if ${MACHINE} == "amd64" +SRCS+= acpi.c SRCS+= i8253.c i8259.c fw_cfg.c loadfile_elf.c mc146818.c ns8250.c SRCS+= x86_vm.c x86_mmio.c SRCS+= psp.c sev.c diff --git a/usr.sbin/vmd/acpi.c b/usr.sbin/vmd/acpi.c new file mode 100644 index 00000000000..6fed77140ff --- /dev/null +++ b/usr.sbin/vmd/acpi.c @@ -0,0 +1,567 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2026 Miguel Landaeta + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include +#include +#include + +#include "fw_cfg.h" +#include "vmd.h" +#include "vmm.h" +#include "acpi.h" + +/* + * Minimal ACPI table generation for vmd guests. + * + * Places a chain of ACPI tables in the BIOS-reserved area + * (0xe0000-0xfffff) so the guest kernel can find and validate them + * via its standard RSDP scan. + * + * Tables written: + * RSDP @ ACPI_RSDP_BASE Root System Description Pointer (ACPI 2.0) + * XSDT @ ACPI_XSDT_BASE Extended System Description Table + * FADT @ ACPI_FADT_BASE Fixed ACPI Description Table (rev 3) + * FACS @ ACPI_FACS_BASE Firmware ACPI Control Structure + * DSDT @ ACPI_DSDT_BASE Differentiated System Description Table (empty) + * MADT @ ACPI_MADT_BASE Multiple APIC Description Table + * + * The MADT contains one Local APIC entry per configured VCPU but no + * I/O APIC entry. With MADT_PCAT_COMPAT flag set, the guest uses the + * emulated i8259 PIC for interrupt routing. An I/O APIC entry is not added + * yet, because it would cause ioapic_attach() to succeed and acpimadt_attach() + * would switch mp_busses to APIC-mode routing, both of which break vmd's + * PIC-based interrupt delivery. + * + * vmm does not expose CPUIDECX_X2APIC in the guest CPUID leaf, so the + * guest uses xAPIC MMIO at 0xfee00000, which falls inside vmd's PCI MMIO + * BAR range and is backed by ordinary guest RAM. LAPIC accesses are + * silently absorbed; interrupt delivery uses vmm's VMX injection and does + * not depend on the guest LAPIC register state. + */ + +#define RSDP_SIG "RSD PTR " +#define VMD_ACPI_OEM_ID "OpenBS" +#define VMD_ACPI_TABLE_ID "OpenBSVM" +#define VMD_ACPI_CREATOR_ID "OVMD" + +#define ACPI_REV_1 1 +#define ACPI_REV_2 2 +#define ACPI_REV_3 3 + + +static uint8_t +acpi_checksum(const void *buf, size_t len) +{ + const uint8_t *p = buf; + uint8_t sum = 0; + + while (len--) + sum += *p++; + return (-sum); +} + +/* + * Fill the standard 36-byte ACPI SDT header. + * The caller must zero the table first and compute+store the checksum + * last, after all other fields are set. + */ +static void +fill_hdr(struct acpi_table_hdr *hdr, const char sig[4], + uint32_t length, uint8_t revision) +{ + memcpy(hdr->th_signature, sig, sizeof(hdr->th_signature)); + hdr->th_length = length; + hdr->th_revision = revision; + hdr->th_checksum = 0; + memcpy(hdr->th_oemid, VMD_ACPI_OEM_ID, sizeof(hdr->th_oemid)); + memcpy(hdr->th_oem_tableid, VMD_ACPI_TABLE_ID, sizeof(hdr->th_oem_tableid)); + hdr->th_oem_revision = ACPI_REV_1; + memcpy(hdr->th_creator_id, VMD_ACPI_CREATOR_ID, sizeof(hdr->th_creator_id)); + hdr->th_creator_revision = ACPI_REV_1; +} + +static void +acpi_write_rsdp(void) +{ + struct acpi_rsdp rsdp = {0}; + + memcpy(rsdp.rsdp_signature, RSDP_SIG, sizeof(rsdp.rsdp_signature)); + memcpy(rsdp.rsdp_oemid, VMD_ACPI_OEM_ID, sizeof(rsdp.rsdp_oemid)); + rsdp.rsdp_revision = ACPI_REV_2; + rsdp.rsdp_length = sizeof(rsdp); + rsdp.rsdp_xsdt = ACPI_XSDT_BASE; + + /* ACPI 1.0 checksum covers the first 20 bytes. */ + rsdp.rsdp_checksum = acpi_checksum(&rsdp, 20); + /* ACPI 2.0 extended checksum covers the full 36 bytes. */ + rsdp.rsdp_ext_checksum = acpi_checksum(&rsdp, sizeof(rsdp)); + + if (write_mem(ACPI_RSDP_BASE, &rsdp, sizeof(rsdp))) + fatalx("%s: failed to write RSDP", __func__); +} + +static void +acpi_write_xsdt(void) +{ + struct acpi_xsdt xsdt = {0}; + size_t xsdt_sz; + + /* th_length and write_mem must use the actual populated size, not + * sizeof(xsdt): the struct holds ACPI_XSDT_MAX_TABLES slots but only + * the first two are used here. Guests compute the entry count as + * (th_length - sizeof(header)) / 8, so trailing zero slots would be + * treated as valid points to physical address 0. + */ + xsdt_sz = sizeof(xsdt.x_hdr) + 2 * sizeof(xsdt.x_tables[0]); + + fill_hdr(&xsdt.x_hdr, "XSDT", xsdt_sz, ACPI_REV_1); + xsdt.x_tables[0] = ACPI_FADT_BASE; + xsdt.x_tables[1] = ACPI_MADT_BASE; + xsdt.x_hdr.th_checksum = acpi_checksum(&xsdt, xsdt_sz); + + if (write_mem(ACPI_XSDT_BASE, &xsdt, xsdt_sz)) + fatalx("%s: failed to write XSDT", __func__); +} + +static void +acpi_write_facs(void) +{ + struct acpi_facs facs = {0}; + + memcpy(facs.facs_signature, "FACS", sizeof(facs.facs_signature)); + facs.facs_length = sizeof(facs); + facs.facs_version = ACPI_REV_2; + /* FACS has no checksum field. */ + + if (write_mem(ACPI_FACS_BASE, &facs, sizeof(facs))) + fatalx("%s: failed to write FACS", __func__); +} + +static void +acpi_write_dsdt(void) +{ + struct acpi_dsdt dsdt = {0}; + + fill_hdr(&dsdt.d_hdr, "DSDT", sizeof(dsdt), ACPI_REV_2); + dsdt.d_hdr.th_checksum = acpi_checksum(&dsdt, sizeof(dsdt)); + + if (write_mem(ACPI_DSDT_BASE, &dsdt, sizeof(dsdt))) + fatalx("%s: failed to write DSDT", __func__); +} + +static void +acpi_write_fadt(void) +{ + struct acpi_fadt fadt = {0}; + + /* not a typo: "FACP" (Fixed ACPI Control and Power) is the spec-mandated signature */ + fill_hdr(&fadt.f_hdr, "FACP", sizeof(fadt), ACPI_REV_3); + + /* + * Both the 32-bit and 64-bit FACS/DSDT pointers are set. + * Kernels using ACPI 2.0 prefer the 64-bit (x_) variants. + */ + fadt.f_firmware_ctrl = ACPI_FACS_BASE; + fadt.f_dsdt = ACPI_DSDT_BASE; + fadt.f_pm_profile = 1; /* Desktop */ + fadt.f_sci_int = 9; /* SCI on ISA IRQ 9 */ + /* f_smi_cmd = 0: no SMI port; ACPI is hardware-always-on. */ + fadt.f_flags = FADT_WBINVD | FADT_PROC_C1; + fadt.f_iapc_boot_arch = FADT_LEGACY_DEVICES | FADT_KBD_CTRL; + fadt.f_x_firmware_ctrl = ACPI_FACS_BASE; + fadt.f_x_dsdt = ACPI_DSDT_BASE; + + fadt.f_hdr.th_checksum = acpi_checksum(&fadt, sizeof(fadt)); + + if (write_mem(ACPI_FADT_BASE, &fadt, sizeof(fadt))) + fatalx("%s: failed to write FADT", __func__); +} + +static void +acpi_write_madt(size_t ncpus) +{ + /* + * Build the MADT into a local buffer so we can compute one + * checksum over the complete variable-length table. + * + * Maximum size: header (44) + VMM_MAX_VCPUS_PER_VM * LAPIC entry (8). + */ + size_t buf_len = sizeof(struct acpi_madt_hdr) + + VMM_MAX_VCPUS_PER_VM * sizeof(struct acpi_madt_lapic); + uint8_t buf[buf_len]; + struct acpi_madt_hdr *madt; + struct acpi_madt_lapic *lapic; + uint32_t madt_len; + size_t i, off; + + madt_len = sizeof(struct acpi_madt_hdr) + + ncpus * sizeof(struct acpi_madt_lapic); + + memset(buf, 0, sizeof(buf)); + madt = (struct acpi_madt_hdr *)buf; + fill_hdr(&madt->m_hdr, "APIC", madt_len, ACPI_REV_1); + madt->m_lapic_addr = LAPIC_BASE; + /* + * MADT_PCAT_COMPAT: signals that the i8259 PIC is present and + * initialised by firmware. The guest uses the PIC for interrupt + * routing, which vmd already emulates. No I/O APIC entry is + * included; adding one would cause the guest to access IOAPIC MMIO + * at 0xfec00000, which vmd does not yet handle. + */ + madt->m_flags = MADT_PCAT_COMPAT; + + off = sizeof(struct acpi_madt_hdr); + for (i = 0; i < ncpus; i++) { + lapic = (struct acpi_madt_lapic *)(buf + off); + lapic->ml_type = MADT_TYPE_LAPIC; + lapic->ml_length = sizeof(struct acpi_madt_lapic); + lapic->ml_proc_id = (uint8_t)i; + lapic->ml_apic_id = (uint8_t)i; + lapic->ml_flags = LAPIC_ENABLED; + off += sizeof(struct acpi_madt_lapic); + } + + madt->m_hdr.th_checksum = acpi_checksum(buf, madt_len); + + if (write_mem(ACPI_MADT_BASE, buf, madt_len)) + fatalx("%s: failed to write MADT", __func__); +} + +/* QEMU/SeaBIOS table-loader protocol */ +/* + * SeaBIOS reads ACPI tables from three fw_cfg files: + * + * etc/table-loader ordered list of 128-byte command entries + * etc/acpi/tables contiguous blob: XSDT, FADT, MADT, DSDT, etc + * etc/acpi/rsdp the 36-byte RSDP + * + * The tables blob is built with RELATIVE offsets in all physical-address + * fields. At runtime SeaBIOS allocates the blob in high RAM, then runs + * the ADD_POINTER commands which ADD the blob's allocated base address to + * each relative offset, converting them to absolute physical addresses. + * ADD_CHECKSUM commands then recompute ACPI table checksums over the + * patched data. + * + * This is the mechanism used by QEMU, described in: + * https://github.com/qemu/qemu/blob/v11.0.0/hw/acpi/bios-linker-loader.c + */ + +#define LOADER_FILESZ 56 /* max filename length including NUL */ +#define LOADER_ENTRY_SIZE 128 /* every command entry is exactly this */ + +#define LOADER_ALLOCATE 1 +#define LOADER_ADD_POINTER 2 +#define LOADER_ADD_CHECKSUM 3 + +#define LOADER_ZONE_HIGH 1 /* allocate in RAM above 1 MB */ +#define LOADER_ZONE_FSEG 2 /* allocate in 0xe0000-0xfffff */ + +struct loader_entry { + uint32_t command; + uint8_t data[124]; /* layout determined by command, see helpers below */ +} __packed; + +_Static_assert(sizeof(struct loader_entry) == LOADER_ENTRY_SIZE, + "loader_entry must be exactly 128 bytes"); + +/* + * fw_cfg table-loader helpers + */ + +/* LOADER_ALLOCATE: allocate aligned to in . */ +static void +loader_add_alloc(struct loader_entry *e, const char *file, + uint32_t align, uint8_t zone) +{ + memset(e, 0, sizeof(*e)); + e->command = LOADER_ALLOCATE; + strlcpy((char *)e->data, file, LOADER_FILESZ); + memcpy(e->data + LOADER_FILESZ, &align, 4); + e->data[LOADER_FILESZ + 4] = zone; +} + +/* + * LOADER_ADD_POINTER: at byte in , add the allocated base + * address of to the existing -byte little-endian value. + * + * Because the initial value in the tables blob is the relative offset of + * the pointed-to table from the blob start, adding the blob base yields + * the correct absolute physical address. + */ +static void +loader_add_pointer(struct loader_entry *e, const char *dest, const char *src, + uint32_t offset, uint8_t size) +{ + memset(e, 0, sizeof(*e)); + e->command = LOADER_ADD_POINTER; + strlcpy((char *)e->data, dest, LOADER_FILESZ); + strlcpy((char *)e->data + LOADER_FILESZ, src, LOADER_FILESZ); + memcpy(e->data + 2 * LOADER_FILESZ, &offset, 4); + e->data[2 * LOADER_FILESZ + 4] = size; +} + +/* + * LOADER_ADD_CHECKSUM: set the byte at in so that the + * arithmetic sum of bytes [start, start+length) is zero mod 256. + */ +static void +loader_add_checksum(struct loader_entry *e, const char *file, + uint32_t offset, uint32_t start, uint32_t length) +{ + memset(e, 0, sizeof(*e)); + e->command = LOADER_ADD_CHECKSUM; + strlcpy((char *)e->data, file, LOADER_FILESZ); + memcpy(e->data + LOADER_FILESZ, &offset, 4); + memcpy(e->data + LOADER_FILESZ + 4, &start, 4); + memcpy(e->data + LOADER_FILESZ + 8, &length, 4); +} + + +/* Build the etc/acpi/tables blob: one contiguous buffer containing XSDT, + * FADT, MADT and DSDT packed sequentially. Fills *l with the size and + * byte offset of each table within the buffer. + * + * All physical-address fields (XSDT entries, FADT DSDT pointers) are + * initialised with their target table's offset from the blob start. + * ADD_POINTER commands in the loader convert these to absolute addresses + * at runt time. All th_checksum fields except DSDT's are zeroed here; + * ADD_CHECKSUM commands recompute them after pointer patching. DSDT has + * no pointer fields so its checksum is stable and compute immediately. + * + * Returns a heap-allocated buffer the caller must free(). + */ +static uint8_t * +acpi_build_tables_blob(size_t ncpus, struct acpi_blob_layout *l) +{ + struct acpi_xsdt *xsdt; + struct acpi_fadt *fadt; + struct acpi_madt_hdr *madt; + struct acpi_madt_lapic *lapic; + struct acpi_dsdt *dsdt; + uint8_t *tables; + size_t i; + + /* Sizes: XSDT accounts only for the two populated table pointers. */ + l->xsdt_sz = sizeof(struct acpi_table_hdr) + 2 * sizeof(uint64_t); + l->fadt_sz = sizeof(struct acpi_fadt); + l->madt_sz = sizeof(struct acpi_madt_hdr) + + ncpus * sizeof(struct acpi_madt_lapic); + l->dsdt_sz = sizeof(struct acpi_dsdt); + + /* Offsets: tables are packed sequentially in the blob. */ + l->xsdt_off = 0; + l->fadt_off = l->xsdt_off + l->xsdt_sz; + l->madt_off = l->fadt_off + l->fadt_sz; + l->dsdt_off = l->madt_off + l->madt_sz; + l->total_sz = l->dsdt_off + l->dsdt_sz; + + tables = calloc(1, l->total_sz); + if (tables == NULL) + fatal("%s: calloc", __func__); + + /* XSDT: entries hold relative offsets; ADD_POINTER makes them absolute. */ + xsdt = (struct acpi_xsdt *)(tables + l->xsdt_off); + fill_hdr(&xsdt->x_hdr, "XSDT", l->xsdt_sz, ACPI_REV_1); + xsdt->x_tables[0] = l->fadt_off; + xsdt->x_tables[1] = l->madt_off; + + /* FADT: DSDT pointers hold relative offsets; patched by ADD_POINTER. */ + fadt = (struct acpi_fadt *)(tables + l->fadt_off); + /* not a typo: "FACP" (Fixed ACPI Control and Power) is the spec-mandated signature */ + fill_hdr(&fadt->f_hdr, "FACP", l->fadt_sz, ACPI_REV_3); + + fadt->f_firmware_ctrl = 0; /* No FACS; S3 not supported */ + fadt->f_dsdt = l->dsdt_off; /* relative; patched by ADD_POINTER */ + fadt->f_pm_profile = 1; /* Desktop */ + fadt->f_sci_int = 9; + fadt->f_flags = FADT_WBINVD | FADT_PROC_C1; + fadt->f_iapc_boot_arch = FADT_LEGACY_DEVICES | FADT_KBD_CTRL; + fadt->f_x_firmware_ctrl = 0; /* No FACS. */ + fadt->f_x_dsdt = l->dsdt_off; /* relative; patched by ADD_POINTER */ + + /* MADT: one Local APIC per VCPU */ + madt = (struct acpi_madt_hdr *)(tables + l->madt_off); + lapic = (struct acpi_madt_lapic *)(tables + l->madt_off + sizeof(*madt)); + fill_hdr(&madt->m_hdr, "APIC", l->madt_sz, ACPI_REV_1); + madt->m_lapic_addr = LAPIC_BASE; + madt->m_flags = MADT_PCAT_COMPAT; + for (i = 0; i < ncpus; i++) { + lapic[i].ml_type = MADT_TYPE_LAPIC; + lapic[i].ml_length = sizeof(struct acpi_madt_lapic); + lapic[i].ml_proc_id = (uint8_t)i; + lapic[i].ml_apic_id = (uint8_t)i; + lapic[i].ml_flags = LAPIC_ENABLED; + } + + /* DSDT: no address fields, checksum is final now. */ + dsdt = (struct acpi_dsdt *)(tables + l->dsdt_off); + fill_hdr(&dsdt->d_hdr, "DSDT", l->dsdt_sz, ACPI_REV_2); + dsdt->d_hdr.th_checksum = acpi_checksum(dsdt, l->dsdt_sz); + + return tables; +} + +/* Build the etc/table-loader command sequence that tells SeaBIOS how to + * allocate, patch and checksum the tables blob and RSDP blob. + * + * Command ordering is protocol-mandated: ADD_POINTER commands must + * precede ADD_CHECKSUM commands for the regions they affect, so that + * checksums are computed over already-patched data. + * + * Sets *ncmds to the number of entries written. + * Returns a heap-allocated array the caller must free(). + */ +static struct loader_entry * +acpi_build_loader(const struct acpi_blob_layout *l, int *ncmds) +{ + struct loader_entry *loader; + int n, maxcmds; + + /* + * 2 ALLOC (tables blob, rdsp blob) + * 2 ADD_POINTER (XSDT entries: FADT, MADT) + * 2 ADD_POINTER (FADT DSDT: 32-bit f_dsdt, 64-bit f_x_dsdt) + * 3 ADD_CHECKSUM (XSDT, FADT, MADT - after all pointers patched) + * 1 ADD_POINTER (RSDP rsdp_xsdt -> XSDT) + * 2 ADD_CHECKSUM (RSDP: ACPI 1.0 first-20, ACPI 2.0 all-36) + */ + maxcmds = 2 + 2 + 2 + 3 + 1 + 2; + loader = calloc(maxcmds, sizeof(*loader)); + if (loader == NULL) + fatal("%s: calloc", __func__); + n = 0; + + /* + * Use LOADER_ZONE_FSEG (0xe0000-0xfffff) rather than LOADER_ZONE_HIGH. + * ZONE_HIGH places the blob at the top of RAM, but SeaBIOS's + * table-loader success path returns early before acpi_setup() runs, so + * the e820 reservation for that region is never written. The guest OS + * then treats those pages as free RAM, overwrites the tables, and + * crashes when ACPI pointers are dereferenced. FSEG is always marked + * reserved; the OS never allocates from there. + */ + loader_add_alloc(&loader[n++], FW_CFG_ACPI_TABLES, 64, LOADER_ZONE_FSEG); + loader_add_alloc(&loader[n++], FW_CFG_ACPI_RSDP, 16, LOADER_ZONE_FSEG); + + /* Patch XSDT entries (relative offsets -> absolute addresses). */ + loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES, + l->xsdt_off + offsetof(struct acpi_xsdt, x_tables[0]), 8); + loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES, + l->xsdt_off + offsetof(struct acpi_xsdt, x_tables[1]), 8); + + /* Patch FADT's DSDT pointer in both 32-bit and 64-bit fields. */ + loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES, + l->fadt_off + offsetof(struct acpi_fadt, f_dsdt), 4); + loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES, + l->fadt_off + offsetof(struct acpi_fadt, f_x_dsdt), 8); + + /* Recompute table checksums after all pointer patching. */ + loader_add_checksum(&loader[n++], FW_CFG_ACPI_TABLES, + l->xsdt_off + offsetof(struct acpi_table_hdr, th_checksum), + l->xsdt_off, l->xsdt_sz); + loader_add_checksum(&loader[n++], FW_CFG_ACPI_TABLES, + l->fadt_off + offsetof(struct acpi_table_hdr, th_checksum), + l->fadt_off, l->fadt_sz); + loader_add_checksum(&loader[n++], FW_CFG_ACPI_TABLES, + l->madt_off + offsetof(struct acpi_table_hdr, th_checksum), + l->madt_off, l->madt_sz); + + /* Patch RSDP's XSDT address (XSDT is at offset 0 of the blob). */ + loader_add_pointer(&loader[n++], FW_CFG_ACPI_RSDP, FW_CFG_ACPI_TABLES, + offsetof(struct acpi_rsdp, rsdp_xsdt), 8); + + /* Recompute RSDP checksums after the XSDT address is written. */ + loader_add_checksum(&loader[n++], FW_CFG_ACPI_RSDP, + offsetof(struct acpi_rsdp, rsdp_checksum), + 0, 20); /* ACPI 1.0: first 20 bytes */ + loader_add_checksum(&loader[n++], FW_CFG_ACPI_RSDP, + offsetof(struct acpi_rsdp, rsdp_ext_checksum), + 0, sizeof(struct acpi_rsdp)); /* ACPI 2.0: all 36 bytes */ + + if (n != maxcmds) + fatalx("%s: loader command count mismatch", __func__); + + *ncmds = n; + return loader; +} + +/* + * acpi_register_fw_cfg - expose ACPI tables to SeaBIOS via fw_cfg. + * + * SeaBIOS manages the 0xe0000-0xfffff region as its own "high table" + * zone and overwrites anything vmd wrote there during POST. The correct + * handoff is the QEMU/SeaBIOS table-loader protocol: vmd registers three + * fw_cfg files and SeaBIOS allocates, patches, and checksums the tables + * at run time inside its own ACPI initialisation sequence. + */ +void +acpi_register_fw_cfg(size_t ncpus) +{ + struct acpi_blob_layout l; + struct acpi_rsdp rsdp = {0}; + uint8_t *tables; + struct loader_entry *loader; + int ncmds; + + tables = acpi_build_tables_blob(ncpus, &l); + + /* + * rsdp_xsdt = 0: XSDT is at offset 0 of the tables blob. + * ADD_POINTER converts it to the absolute physical address at run time. + * Both checksum fields are zeroed here; ADD_CHECKSUM fills them in. + */ + memcpy(rsdp.rsdp_signature, RSDP_SIG, sizeof(rsdp.rsdp_signature)); + memcpy(rsdp.rsdp_oemid, VMD_ACPI_OEM_ID, sizeof(rsdp.rsdp_oemid)); + rsdp.rsdp_revision = ACPI_REV_2; + rsdp.rsdp_length = sizeof(rsdp); + + loader = acpi_build_loader(&l, &ncmds); + + fw_cfg_add_file(FW_CFG_TABLE_LOADER, loader, (size_t)ncmds * sizeof(*loader)); + fw_cfg_add_file(FW_CFG_ACPI_TABLES, tables, l.total_sz); + fw_cfg_add_file(FW_CFG_ACPI_RSDP, &rsdp, sizeof(rsdp)); + + log_debug("%s: ACPI tables written (%zu VCPU%s)", __func__, + ncpus, ncpus == 1 ? "" : "s"); + + free(tables); + free(loader); +} + +/* + * acpi_write_tables - write ACPI tables into the BIOS scan window. + * + * Used by the direct kernel boot path (-b): places tables at 0xe0000 so + * the kernel's acpi_probe() finds them. No firmware runs to overwrite + * this region. + */ +void +acpi_write_tables(size_t ncpus) +{ + acpi_write_rsdp(); + acpi_write_xsdt(); + acpi_write_facs(); + acpi_write_dsdt(); + acpi_write_fadt(); + acpi_write_madt(ncpus); + + log_debug("%s: ACPI tables written (%zu VCPU%s)", __func__, + ncpus, ncpus == 1 ? "" : "s"); +} diff --git a/usr.sbin/vmd/acpi.h b/usr.sbin/vmd/acpi.h new file mode 100644 index 00000000000..d05c23da5ef --- /dev/null +++ b/usr.sbin/vmd/acpi.h @@ -0,0 +1,228 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2005 Thorsten Lockert + * Copyright (c) 2005 Marco Peereboom + * Copyright (c) 2026 Miguel Landaeta + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _ACPI_H_ +#define _ACPI_H_ + +#include "vmd.h" + +/* + * Physical base addresses for each table, all within the BIOS-reserved + * region 0x90000-0xfffff (VM_MEM_RESERVED, accessible via write_mem). + * The RSDP must be 16-byte aligned and within 0xe0000-0xfffff. + */ +#define ACPI_RSDP_BASE 0xe0000ULL +#define ACPI_XSDT_BASE 0xe0040ULL +#define ACPI_FADT_BASE 0xe0080ULL +#define ACPI_FACS_BASE 0xe0180ULL +#define ACPI_DSDT_BASE 0xe01c0ULL +#define ACPI_MADT_BASE 0xe0200ULL + +#define LAPIC_BASE 0xfee00000UL + +/* FADT flags (ACPI spec S5.2.9) */ +#define FADT_WBINVD 0x00000001 /* WBINVD supported */ +#define FADT_PROC_C1 0x00000004 /* C1 via HLT supported */ + +/* FADT IA-PC Boot Architecture flags */ +#define FADT_LEGACY_DEVICES 0x0001 +#define FADT_KBD_CTRL 0x0002 /* 8042 keyboard controller present */ + +/* MADT header flags */ +#define MADT_PCAT_COMPAT 0x00000001 /* 8259 PIC present and initialised */ + +/* MADT subtable types */ +#define MADT_TYPE_LAPIC 0 +#define MADT_TYPE_IOAPIC 1 + +/* MADT Local APIC flags */ +#define LAPIC_ENABLED 0x00000001 + + +/* ACPI structure definitions (ACPI 2.0 / ACPI spec S5.2) */ + +/* + * Root System Description Pointer. + * ACPI 1.0 portion: first 20 bytes (rsdp_checksum covers these). + * ACPI 2.0 extension: bytes 20-35 (rsdp_ext_checksum covers all 36). + */ +struct acpi_rsdp { + uint8_t rsdp_signature[8]; /* "RSD PTR " */ + uint8_t rsdp_checksum; /* sum of bytes 0-19 == 0 */ + uint8_t rsdp_oemid[6]; + uint8_t rsdp_revision; /* 2 = ACPI 2.0+ */ + uint32_t rsdp_rsdt; /* 32-bit RSDT addr (unused) */ + uint32_t rsdp_length; /* total length (36) */ + uint64_t rsdp_xsdt; /* 64-bit XSDT physical addr */ + uint8_t rsdp_ext_checksum; /* sum of all 36 bytes == 0 */ + uint8_t rsdp_reserved[3]; +} __packed; + +/* Standard 36-byte header present in all SDTs (except FACS). */ +struct acpi_table_hdr { + uint8_t th_signature[4]; + uint32_t th_length; + uint8_t th_revision; + uint8_t th_checksum; + uint8_t th_oemid[6]; + uint8_t th_oem_tableid[8]; + uint32_t th_oem_revision; + uint8_t th_creator_id[4]; + uint32_t th_creator_revision; +} __packed; + +/* + * Maximum number of SDT pointers the XSDT can hold. Only the first + * ntables slots are populated; th_length reflects the actual size so + * guests do not follow the trailing zero entries. + */ +#define ACPI_XSDT_MAX_TABLES 8 + +/* XSDT: points to other tables via 64-bit physical addresses. */ +struct acpi_xsdt { + struct acpi_table_hdr x_hdr; + uint64_t x_tables[ACPI_XSDT_MAX_TABLES]; +} __packed; + +/* + * Layout of the etc/acpi/tables fw_cfg blob: one contiguous buffer + * containing XSDT, FADT, MADT and DSDT packed sequentially. + * Computed by acpi_build_tables_blob() and consumed by acpi_build_loader(). + */ +struct acpi_blob_layout { + size_t xsdt_off, xsdt_sz; + size_t fadt_off, fadt_sz; + size_t madt_off, madt_sz; + size_t dsdt_off, dsdt_sz; + size_t total_sz; +}; + +/* + * Firmware ACPI Control Structure. + * Special: no standard table header, no checksum field. + */ +struct acpi_facs { + uint8_t facs_signature[4]; /* "FACS" */ + uint32_t facs_length; /* 64 */ + uint32_t facs_hw_signature; + uint32_t facs_wakeup_vector; + uint32_t facs_global_lock; + uint32_t facs_flags; + uint64_t facs_x_wakeup_vector; + uint8_t facs_version; /* 2 = ACPI 2.0 */ + uint8_t facs_reserved[31]; +} __packed; + +/* Empty DSDT (no AML), referenced by FADT */ +struct acpi_dsdt { + struct acpi_table_hdr d_hdr; +} __packed; + +/* + * Generic Address Structure: encodes a register location. + * Used in the FADT extended fields. + */ +struct acpi_gas { + uint8_t gas_addrspace; /* 0 = memory, 1 = I/O */ + uint8_t gas_bitwidth; + uint8_t gas_bitoffset; + uint8_t gas_accesssize; + uint64_t gas_address; +} __packed; + +/* Fixed ACPI Description Table, ACPI 2.0 (revision 3). */ +struct acpi_fadt { + struct acpi_table_hdr f_hdr; /* "FACP", rev 3 */ + uint32_t f_firmware_ctrl; /* 32-bit FACS addr */ + uint32_t f_dsdt; /* 32-bit DSDT addr */ + uint8_t f_int_model; /* obsolete in rev >= 3 */ + uint8_t f_pm_profile; + uint16_t f_sci_int; /* SCI IRQ number */ + uint32_t f_smi_cmd; /* 0 = no SMI port */ + uint8_t f_acpi_enable; + uint8_t f_acpi_disable; + uint8_t f_s4bios_req; + uint8_t f_pstate_cnt; + uint32_t f_pm1a_evt_blk; + uint32_t f_pm1b_evt_blk; + uint32_t f_pm1a_cnt_blk; + uint32_t f_pm1b_cnt_blk; + uint32_t f_pm2_cnt_blk; + uint32_t f_pm_tmr_blk; + uint32_t f_gpe0_blk; + uint32_t f_gpe1_blk; + uint8_t f_pm1_evt_len; + uint8_t f_pm1_cnt_len; + uint8_t f_pm2_cnt_len; + uint8_t f_pm_tmr_len; + uint8_t f_gpe0_blk_len; + uint8_t f_gpe1_blk_len; + uint8_t f_gpe1_base; + uint8_t f_cst_cnt; + uint16_t f_p_lvl2_lat; + uint16_t f_p_lvl3_lat; + uint16_t f_flush_size; + uint16_t f_flush_stride; + uint8_t f_duty_offset; + uint8_t f_duty_width; + uint8_t f_day_alrm; + uint8_t f_mon_alrm; + uint8_t f_century; + uint16_t f_iapc_boot_arch; + uint8_t f_reserved1; + uint32_t f_flags; + struct acpi_gas f_reset_reg; /* reset register (unused) */ + uint8_t f_reset_value; + uint8_t f_reserved2[3]; + uint64_t f_x_firmware_ctrl; /* 64-bit FACS addr */ + uint64_t f_x_dsdt; /* 64-bit DSDT addr */ + struct acpi_gas f_x_pm1a_evt_blk; + struct acpi_gas f_x_pm1b_evt_blk; + struct acpi_gas f_x_pm1a_cnt_blk; + struct acpi_gas f_x_pm1b_cnt_blk; + struct acpi_gas f_x_pm2_cnt_blk; + struct acpi_gas f_x_pm_tmr_blk; + struct acpi_gas f_x_gpe0_blk; + struct acpi_gas f_x_gpe1_blk; +} __packed; + +/* + * Multiple APIC Description Table header. + * Followed immediately by a variable-length list of APIC subtables. + */ +struct acpi_madt_hdr { + struct acpi_table_hdr m_hdr; /* "APIC", rev 1 */ + uint32_t m_lapic_addr; /* Local APIC physical base */ + uint32_t m_flags; +} __packed; + +/* MADT subtable: Processor Local APIC (type 0). */ +struct acpi_madt_lapic { + uint8_t ml_type; /* MADT_TYPE_LAPIC */ + uint8_t ml_length; /* sizeof(struct acpi_madt_lapic) */ + uint8_t ml_proc_id; /* ACPI processor ID */ + uint8_t ml_apic_id; /* Local APIC ID */ + uint32_t ml_flags; /* LAPIC_ENABLED */ +} __packed; + + +void acpi_write_tables(size_t ncpus); +void acpi_register_fw_cfg(size_t ncpus); + +#endif /* _ACPI_H_ */ diff --git a/usr.sbin/vmd/fw_cfg.c b/usr.sbin/vmd/fw_cfg.c index 3d096f9a4a5..ef541f384e1 100644 --- a/usr.sbin/vmd/fw_cfg.c +++ b/usr.sbin/vmd/fw_cfg.c @@ -23,6 +23,7 @@ #include #include +#include "acpi.h" #include "atomicio.h" #include "pci.h" #include "vmd.h" @@ -96,6 +97,8 @@ fw_cfg_init(struct vmop_create_params *vmc) } fw_cfg_add_file("etc/e820", &e820, e820_len); + acpi_register_fw_cfg(vmc->vmc_ncpus); + /* do not double print chars on serial port */ fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd)); diff --git a/usr.sbin/vmd/fw_cfg.h b/usr.sbin/vmd/fw_cfg.h index b367584d5d7..9f07a923481 100644 --- a/usr.sbin/vmd/fw_cfg.h +++ b/usr.sbin/vmd/fw_cfg.h @@ -25,6 +25,10 @@ #define FW_CFG_IO_DMA_ADDR_HIGH 0x514 #define FW_CFG_IO_DMA_ADDR_LOW 0x518 +#define FW_CFG_TABLE_LOADER "etc/table-loader" +#define FW_CFG_ACPI_TABLES "etc/acpi/tables" +#define FW_CFG_ACPI_RSDP "etc/acpi/rsdp" + void fw_cfg_init(struct vmop_create_params *); uint8_t vcpu_exit_fw_cfg(struct vm_run_params *); uint8_t vcpu_exit_fw_cfg_dma(struct vm_run_params *); diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c index 5337204a1a0..cd00023f17b 100644 --- a/usr.sbin/vmd/loadfile_elf.c +++ b/usr.sbin/vmd/loadfile_elf.c @@ -99,6 +99,7 @@ #include #include +#include "acpi.h" #include "loadfile.h" #include "vmd.h" @@ -254,6 +255,8 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, bootargsz = push_bootargs(memmap, n, bootmac); stacksize = push_stack(bootargsz, marks[MARK_END]); + acpi_write_tables(vm->vm_params.vmc_ncpus); + vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY]; vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize; vrs->vrs_gdtr.vsi_base = GDT_PAGE; -- 2.54.0