Download raw body.
[4/5] vmd: generate and expose ACPI tables to the guest via fw_cfg
This is a series of commits to expose ACPI tables to vmd guests.
These were committed and tested individually.
They can be reviewed in their entirety here:
https://github.com/openbsd/src/compare/master...nomadium:src:add-support-for-acpi-in-vmd.patch
Generate a minimal ACPI table set (RSDP, XSDT, MADT, FADT, DSDT) and
expose it to SeaBIOS guests via the QEMU fw_cfg table-loader protocol
(etc/acpi/rsdp, etc/acpi/tables, and etc/table-loader fw_cfg files).
---
usr.sbin/vmd/Makefile | 1 +
usr.sbin/vmd/acpi.c | 567 ++++++++++++++++++++++++++++++++++++
usr.sbin/vmd/acpi.h | 228 +++++++++++++++
usr.sbin/vmd/fw_cfg.c | 3 +
usr.sbin/vmd/fw_cfg.h | 4 +
usr.sbin/vmd/loadfile_elf.c | 3 +
6 files changed, 806 insertions(+)
create mode 100644 usr.sbin/vmd/acpi.c
create mode 100644 usr.sbin/vmd/acpi.h
diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile
index bc7a159defc..153fb5f93c7 100644
--- a/usr.sbin/vmd/Makefile
+++ b/usr.sbin/vmd/Makefile
@@ -9,6 +9,7 @@ SRCS+= vioscsi.c vioraw.c vioqcow2.c vm_agentx.c vioblk.c
SRCS+= vionet.c
.if ${MACHINE} == "amd64"
+SRCS+= acpi.c
SRCS+= i8253.c i8259.c fw_cfg.c loadfile_elf.c mc146818.c ns8250.c
SRCS+= x86_vm.c x86_mmio.c
SRCS+= psp.c sev.c
diff --git a/usr.sbin/vmd/acpi.c b/usr.sbin/vmd/acpi.c
new file mode 100644
index 00000000000..6fed77140ff
--- /dev/null
+++ b/usr.sbin/vmd/acpi.c
@@ -0,0 +1,567 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2026 Miguel Landaeta <miguel@miguel.cc>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <dev/vmm/vmm.h>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "fw_cfg.h"
+#include "vmd.h"
+#include "vmm.h"
+#include "acpi.h"
+
+/*
+ * Minimal ACPI table generation for vmd guests.
+ *
+ * Places a chain of ACPI tables in the BIOS-reserved area
+ * (0xe0000-0xfffff) so the guest kernel can find and validate them
+ * via its standard RSDP scan.
+ *
+ * Tables written:
+ * RSDP @ ACPI_RSDP_BASE Root System Description Pointer (ACPI 2.0)
+ * XSDT @ ACPI_XSDT_BASE Extended System Description Table
+ * FADT @ ACPI_FADT_BASE Fixed ACPI Description Table (rev 3)
+ * FACS @ ACPI_FACS_BASE Firmware ACPI Control Structure
+ * DSDT @ ACPI_DSDT_BASE Differentiated System Description Table (empty)
+ * MADT @ ACPI_MADT_BASE Multiple APIC Description Table
+ *
+ * The MADT contains one Local APIC entry per configured VCPU but no
+ * I/O APIC entry. With MADT_PCAT_COMPAT flag set, the guest uses the
+ * emulated i8259 PIC for interrupt routing. An I/O APIC entry is not added
+ * yet, because it would cause ioapic_attach() to succeed and acpimadt_attach()
+ * would switch mp_busses to APIC-mode routing, both of which break vmd's
+ * PIC-based interrupt delivery.
+ *
+ * vmm does not expose CPUIDECX_X2APIC in the guest CPUID leaf, so the
+ * guest uses xAPIC MMIO at 0xfee00000, which falls inside vmd's PCI MMIO
+ * BAR range and is backed by ordinary guest RAM. LAPIC accesses are
+ * silently absorbed; interrupt delivery uses vmm's VMX injection and does
+ * not depend on the guest LAPIC register state.
+ */
+
+#define RSDP_SIG "RSD PTR "
+#define VMD_ACPI_OEM_ID "OpenBS"
+#define VMD_ACPI_TABLE_ID "OpenBSVM"
+#define VMD_ACPI_CREATOR_ID "OVMD"
+
+#define ACPI_REV_1 1
+#define ACPI_REV_2 2
+#define ACPI_REV_3 3
+
+
+static uint8_t
+acpi_checksum(const void *buf, size_t len)
+{
+ const uint8_t *p = buf;
+ uint8_t sum = 0;
+
+ while (len--)
+ sum += *p++;
+ return (-sum);
+}
+
+/*
+ * Fill the standard 36-byte ACPI SDT header.
+ * The caller must zero the table first and compute+store the checksum
+ * last, after all other fields are set.
+ */
+static void
+fill_hdr(struct acpi_table_hdr *hdr, const char sig[4],
+ uint32_t length, uint8_t revision)
+{
+ memcpy(hdr->th_signature, sig, sizeof(hdr->th_signature));
+ hdr->th_length = length;
+ hdr->th_revision = revision;
+ hdr->th_checksum = 0;
+ memcpy(hdr->th_oemid, VMD_ACPI_OEM_ID, sizeof(hdr->th_oemid));
+ memcpy(hdr->th_oem_tableid, VMD_ACPI_TABLE_ID, sizeof(hdr->th_oem_tableid));
+ hdr->th_oem_revision = ACPI_REV_1;
+ memcpy(hdr->th_creator_id, VMD_ACPI_CREATOR_ID, sizeof(hdr->th_creator_id));
+ hdr->th_creator_revision = ACPI_REV_1;
+}
+
+static void
+acpi_write_rsdp(void)
+{
+ struct acpi_rsdp rsdp = {0};
+
+ memcpy(rsdp.rsdp_signature, RSDP_SIG, sizeof(rsdp.rsdp_signature));
+ memcpy(rsdp.rsdp_oemid, VMD_ACPI_OEM_ID, sizeof(rsdp.rsdp_oemid));
+ rsdp.rsdp_revision = ACPI_REV_2;
+ rsdp.rsdp_length = sizeof(rsdp);
+ rsdp.rsdp_xsdt = ACPI_XSDT_BASE;
+
+ /* ACPI 1.0 checksum covers the first 20 bytes. */
+ rsdp.rsdp_checksum = acpi_checksum(&rsdp, 20);
+ /* ACPI 2.0 extended checksum covers the full 36 bytes. */
+ rsdp.rsdp_ext_checksum = acpi_checksum(&rsdp, sizeof(rsdp));
+
+ if (write_mem(ACPI_RSDP_BASE, &rsdp, sizeof(rsdp)))
+ fatalx("%s: failed to write RSDP", __func__);
+}
+
+static void
+acpi_write_xsdt(void)
+{
+ struct acpi_xsdt xsdt = {0};
+ size_t xsdt_sz;
+
+ /* th_length and write_mem must use the actual populated size, not
+ * sizeof(xsdt): the struct holds ACPI_XSDT_MAX_TABLES slots but only
+ * the first two are used here. Guests compute the entry count as
+ * (th_length - sizeof(header)) / 8, so trailing zero slots would be
+ * treated as valid points to physical address 0.
+ */
+ xsdt_sz = sizeof(xsdt.x_hdr) + 2 * sizeof(xsdt.x_tables[0]);
+
+ fill_hdr(&xsdt.x_hdr, "XSDT", xsdt_sz, ACPI_REV_1);
+ xsdt.x_tables[0] = ACPI_FADT_BASE;
+ xsdt.x_tables[1] = ACPI_MADT_BASE;
+ xsdt.x_hdr.th_checksum = acpi_checksum(&xsdt, xsdt_sz);
+
+ if (write_mem(ACPI_XSDT_BASE, &xsdt, xsdt_sz))
+ fatalx("%s: failed to write XSDT", __func__);
+}
+
+static void
+acpi_write_facs(void)
+{
+ struct acpi_facs facs = {0};
+
+ memcpy(facs.facs_signature, "FACS", sizeof(facs.facs_signature));
+ facs.facs_length = sizeof(facs);
+ facs.facs_version = ACPI_REV_2;
+ /* FACS has no checksum field. */
+
+ if (write_mem(ACPI_FACS_BASE, &facs, sizeof(facs)))
+ fatalx("%s: failed to write FACS", __func__);
+}
+
+static void
+acpi_write_dsdt(void)
+{
+ struct acpi_dsdt dsdt = {0};
+
+ fill_hdr(&dsdt.d_hdr, "DSDT", sizeof(dsdt), ACPI_REV_2);
+ dsdt.d_hdr.th_checksum = acpi_checksum(&dsdt, sizeof(dsdt));
+
+ if (write_mem(ACPI_DSDT_BASE, &dsdt, sizeof(dsdt)))
+ fatalx("%s: failed to write DSDT", __func__);
+}
+
+static void
+acpi_write_fadt(void)
+{
+ struct acpi_fadt fadt = {0};
+
+ /* not a typo: "FACP" (Fixed ACPI Control and Power) is the spec-mandated signature */
+ fill_hdr(&fadt.f_hdr, "FACP", sizeof(fadt), ACPI_REV_3);
+
+ /*
+ * Both the 32-bit and 64-bit FACS/DSDT pointers are set.
+ * Kernels using ACPI 2.0 prefer the 64-bit (x_) variants.
+ */
+ fadt.f_firmware_ctrl = ACPI_FACS_BASE;
+ fadt.f_dsdt = ACPI_DSDT_BASE;
+ fadt.f_pm_profile = 1; /* Desktop */
+ fadt.f_sci_int = 9; /* SCI on ISA IRQ 9 */
+ /* f_smi_cmd = 0: no SMI port; ACPI is hardware-always-on. */
+ fadt.f_flags = FADT_WBINVD | FADT_PROC_C1;
+ fadt.f_iapc_boot_arch = FADT_LEGACY_DEVICES | FADT_KBD_CTRL;
+ fadt.f_x_firmware_ctrl = ACPI_FACS_BASE;
+ fadt.f_x_dsdt = ACPI_DSDT_BASE;
+
+ fadt.f_hdr.th_checksum = acpi_checksum(&fadt, sizeof(fadt));
+
+ if (write_mem(ACPI_FADT_BASE, &fadt, sizeof(fadt)))
+ fatalx("%s: failed to write FADT", __func__);
+}
+
+static void
+acpi_write_madt(size_t ncpus)
+{
+ /*
+ * Build the MADT into a local buffer so we can compute one
+ * checksum over the complete variable-length table.
+ *
+ * Maximum size: header (44) + VMM_MAX_VCPUS_PER_VM * LAPIC entry (8).
+ */
+ size_t buf_len = sizeof(struct acpi_madt_hdr) +
+ VMM_MAX_VCPUS_PER_VM * sizeof(struct acpi_madt_lapic);
+ uint8_t buf[buf_len];
+ struct acpi_madt_hdr *madt;
+ struct acpi_madt_lapic *lapic;
+ uint32_t madt_len;
+ size_t i, off;
+
+ madt_len = sizeof(struct acpi_madt_hdr) +
+ ncpus * sizeof(struct acpi_madt_lapic);
+
+ memset(buf, 0, sizeof(buf));
+ madt = (struct acpi_madt_hdr *)buf;
+ fill_hdr(&madt->m_hdr, "APIC", madt_len, ACPI_REV_1);
+ madt->m_lapic_addr = LAPIC_BASE;
+ /*
+ * MADT_PCAT_COMPAT: signals that the i8259 PIC is present and
+ * initialised by firmware. The guest uses the PIC for interrupt
+ * routing, which vmd already emulates. No I/O APIC entry is
+ * included; adding one would cause the guest to access IOAPIC MMIO
+ * at 0xfec00000, which vmd does not yet handle.
+ */
+ madt->m_flags = MADT_PCAT_COMPAT;
+
+ off = sizeof(struct acpi_madt_hdr);
+ for (i = 0; i < ncpus; i++) {
+ lapic = (struct acpi_madt_lapic *)(buf + off);
+ lapic->ml_type = MADT_TYPE_LAPIC;
+ lapic->ml_length = sizeof(struct acpi_madt_lapic);
+ lapic->ml_proc_id = (uint8_t)i;
+ lapic->ml_apic_id = (uint8_t)i;
+ lapic->ml_flags = LAPIC_ENABLED;
+ off += sizeof(struct acpi_madt_lapic);
+ }
+
+ madt->m_hdr.th_checksum = acpi_checksum(buf, madt_len);
+
+ if (write_mem(ACPI_MADT_BASE, buf, madt_len))
+ fatalx("%s: failed to write MADT", __func__);
+}
+
+/* QEMU/SeaBIOS table-loader protocol */
+/*
+ * SeaBIOS reads ACPI tables from three fw_cfg files:
+ *
+ * etc/table-loader ordered list of 128-byte command entries
+ * etc/acpi/tables contiguous blob: XSDT, FADT, MADT, DSDT, etc
+ * etc/acpi/rsdp the 36-byte RSDP
+ *
+ * The tables blob is built with RELATIVE offsets in all physical-address
+ * fields. At runtime SeaBIOS allocates the blob in high RAM, then runs
+ * the ADD_POINTER commands which ADD the blob's allocated base address to
+ * each relative offset, converting them to absolute physical addresses.
+ * ADD_CHECKSUM commands then recompute ACPI table checksums over the
+ * patched data.
+ *
+ * This is the mechanism used by QEMU, described in:
+ * https://github.com/qemu/qemu/blob/v11.0.0/hw/acpi/bios-linker-loader.c
+ */
+
+#define LOADER_FILESZ 56 /* max filename length including NUL */
+#define LOADER_ENTRY_SIZE 128 /* every command entry is exactly this */
+
+#define LOADER_ALLOCATE 1
+#define LOADER_ADD_POINTER 2
+#define LOADER_ADD_CHECKSUM 3
+
+#define LOADER_ZONE_HIGH 1 /* allocate in RAM above 1 MB */
+#define LOADER_ZONE_FSEG 2 /* allocate in 0xe0000-0xfffff */
+
+struct loader_entry {
+ uint32_t command;
+ uint8_t data[124]; /* layout determined by command, see helpers below */
+} __packed;
+
+_Static_assert(sizeof(struct loader_entry) == LOADER_ENTRY_SIZE,
+ "loader_entry must be exactly 128 bytes");
+
+/*
+ * fw_cfg table-loader helpers
+ */
+
+/* LOADER_ALLOCATE: allocate <file> aligned to <align> in <zone>. */
+static void
+loader_add_alloc(struct loader_entry *e, const char *file,
+ uint32_t align, uint8_t zone)
+{
+ memset(e, 0, sizeof(*e));
+ e->command = LOADER_ALLOCATE;
+ strlcpy((char *)e->data, file, LOADER_FILESZ);
+ memcpy(e->data + LOADER_FILESZ, &align, 4);
+ e->data[LOADER_FILESZ + 4] = zone;
+}
+
+/*
+ * LOADER_ADD_POINTER: at byte <offset> in <dest>, add the allocated base
+ * address of <src> to the existing <size>-byte little-endian value.
+ *
+ * Because the initial value in the tables blob is the relative offset of
+ * the pointed-to table from the blob start, adding the blob base yields
+ * the correct absolute physical address.
+ */
+static void
+loader_add_pointer(struct loader_entry *e, const char *dest, const char *src,
+ uint32_t offset, uint8_t size)
+{
+ memset(e, 0, sizeof(*e));
+ e->command = LOADER_ADD_POINTER;
+ strlcpy((char *)e->data, dest, LOADER_FILESZ);
+ strlcpy((char *)e->data + LOADER_FILESZ, src, LOADER_FILESZ);
+ memcpy(e->data + 2 * LOADER_FILESZ, &offset, 4);
+ e->data[2 * LOADER_FILESZ + 4] = size;
+}
+
+/*
+ * LOADER_ADD_CHECKSUM: set the byte at <offset> in <file> so that the
+ * arithmetic sum of bytes [start, start+length) is zero mod 256.
+ */
+static void
+loader_add_checksum(struct loader_entry *e, const char *file,
+ uint32_t offset, uint32_t start, uint32_t length)
+{
+ memset(e, 0, sizeof(*e));
+ e->command = LOADER_ADD_CHECKSUM;
+ strlcpy((char *)e->data, file, LOADER_FILESZ);
+ memcpy(e->data + LOADER_FILESZ, &offset, 4);
+ memcpy(e->data + LOADER_FILESZ + 4, &start, 4);
+ memcpy(e->data + LOADER_FILESZ + 8, &length, 4);
+}
+
+
+/* Build the etc/acpi/tables blob: one contiguous buffer containing XSDT,
+ * FADT, MADT and DSDT packed sequentially. Fills *l with the size and
+ * byte offset of each table within the buffer.
+ *
+ * All physical-address fields (XSDT entries, FADT DSDT pointers) are
+ * initialised with their target table's offset from the blob start.
+ * ADD_POINTER commands in the loader convert these to absolute addresses
+ * at runt time. All th_checksum fields except DSDT's are zeroed here;
+ * ADD_CHECKSUM commands recompute them after pointer patching. DSDT has
+ * no pointer fields so its checksum is stable and compute immediately.
+ *
+ * Returns a heap-allocated buffer the caller must free().
+ */
+static uint8_t *
+acpi_build_tables_blob(size_t ncpus, struct acpi_blob_layout *l)
+{
+ struct acpi_xsdt *xsdt;
+ struct acpi_fadt *fadt;
+ struct acpi_madt_hdr *madt;
+ struct acpi_madt_lapic *lapic;
+ struct acpi_dsdt *dsdt;
+ uint8_t *tables;
+ size_t i;
+
+ /* Sizes: XSDT accounts only for the two populated table pointers. */
+ l->xsdt_sz = sizeof(struct acpi_table_hdr) + 2 * sizeof(uint64_t);
+ l->fadt_sz = sizeof(struct acpi_fadt);
+ l->madt_sz = sizeof(struct acpi_madt_hdr) +
+ ncpus * sizeof(struct acpi_madt_lapic);
+ l->dsdt_sz = sizeof(struct acpi_dsdt);
+
+ /* Offsets: tables are packed sequentially in the blob. */
+ l->xsdt_off = 0;
+ l->fadt_off = l->xsdt_off + l->xsdt_sz;
+ l->madt_off = l->fadt_off + l->fadt_sz;
+ l->dsdt_off = l->madt_off + l->madt_sz;
+ l->total_sz = l->dsdt_off + l->dsdt_sz;
+
+ tables = calloc(1, l->total_sz);
+ if (tables == NULL)
+ fatal("%s: calloc", __func__);
+
+ /* XSDT: entries hold relative offsets; ADD_POINTER makes them absolute. */
+ xsdt = (struct acpi_xsdt *)(tables + l->xsdt_off);
+ fill_hdr(&xsdt->x_hdr, "XSDT", l->xsdt_sz, ACPI_REV_1);
+ xsdt->x_tables[0] = l->fadt_off;
+ xsdt->x_tables[1] = l->madt_off;
+
+ /* FADT: DSDT pointers hold relative offsets; patched by ADD_POINTER. */
+ fadt = (struct acpi_fadt *)(tables + l->fadt_off);
+ /* not a typo: "FACP" (Fixed ACPI Control and Power) is the spec-mandated signature */
+ fill_hdr(&fadt->f_hdr, "FACP", l->fadt_sz, ACPI_REV_3);
+
+ fadt->f_firmware_ctrl = 0; /* No FACS; S3 not supported */
+ fadt->f_dsdt = l->dsdt_off; /* relative; patched by ADD_POINTER */
+ fadt->f_pm_profile = 1; /* Desktop */
+ fadt->f_sci_int = 9;
+ fadt->f_flags = FADT_WBINVD | FADT_PROC_C1;
+ fadt->f_iapc_boot_arch = FADT_LEGACY_DEVICES | FADT_KBD_CTRL;
+ fadt->f_x_firmware_ctrl = 0; /* No FACS. */
+ fadt->f_x_dsdt = l->dsdt_off; /* relative; patched by ADD_POINTER */
+
+ /* MADT: one Local APIC per VCPU */
+ madt = (struct acpi_madt_hdr *)(tables + l->madt_off);
+ lapic = (struct acpi_madt_lapic *)(tables + l->madt_off + sizeof(*madt));
+ fill_hdr(&madt->m_hdr, "APIC", l->madt_sz, ACPI_REV_1);
+ madt->m_lapic_addr = LAPIC_BASE;
+ madt->m_flags = MADT_PCAT_COMPAT;
+ for (i = 0; i < ncpus; i++) {
+ lapic[i].ml_type = MADT_TYPE_LAPIC;
+ lapic[i].ml_length = sizeof(struct acpi_madt_lapic);
+ lapic[i].ml_proc_id = (uint8_t)i;
+ lapic[i].ml_apic_id = (uint8_t)i;
+ lapic[i].ml_flags = LAPIC_ENABLED;
+ }
+
+ /* DSDT: no address fields, checksum is final now. */
+ dsdt = (struct acpi_dsdt *)(tables + l->dsdt_off);
+ fill_hdr(&dsdt->d_hdr, "DSDT", l->dsdt_sz, ACPI_REV_2);
+ dsdt->d_hdr.th_checksum = acpi_checksum(dsdt, l->dsdt_sz);
+
+ return tables;
+}
+
+/* Build the etc/table-loader command sequence that tells SeaBIOS how to
+ * allocate, patch and checksum the tables blob and RSDP blob.
+ *
+ * Command ordering is protocol-mandated: ADD_POINTER commands must
+ * precede ADD_CHECKSUM commands for the regions they affect, so that
+ * checksums are computed over already-patched data.
+ *
+ * Sets *ncmds to the number of entries written.
+ * Returns a heap-allocated array the caller must free().
+ */
+static struct loader_entry *
+acpi_build_loader(const struct acpi_blob_layout *l, int *ncmds)
+{
+ struct loader_entry *loader;
+ int n, maxcmds;
+
+ /*
+ * 2 ALLOC (tables blob, rdsp blob)
+ * 2 ADD_POINTER (XSDT entries: FADT, MADT)
+ * 2 ADD_POINTER (FADT DSDT: 32-bit f_dsdt, 64-bit f_x_dsdt)
+ * 3 ADD_CHECKSUM (XSDT, FADT, MADT - after all pointers patched)
+ * 1 ADD_POINTER (RSDP rsdp_xsdt -> XSDT)
+ * 2 ADD_CHECKSUM (RSDP: ACPI 1.0 first-20, ACPI 2.0 all-36)
+ */
+ maxcmds = 2 + 2 + 2 + 3 + 1 + 2;
+ loader = calloc(maxcmds, sizeof(*loader));
+ if (loader == NULL)
+ fatal("%s: calloc", __func__);
+ n = 0;
+
+ /*
+ * Use LOADER_ZONE_FSEG (0xe0000-0xfffff) rather than LOADER_ZONE_HIGH.
+ * ZONE_HIGH places the blob at the top of RAM, but SeaBIOS's
+ * table-loader success path returns early before acpi_setup() runs, so
+ * the e820 reservation for that region is never written. The guest OS
+ * then treats those pages as free RAM, overwrites the tables, and
+ * crashes when ACPI pointers are dereferenced. FSEG is always marked
+ * reserved; the OS never allocates from there.
+ */
+ loader_add_alloc(&loader[n++], FW_CFG_ACPI_TABLES, 64, LOADER_ZONE_FSEG);
+ loader_add_alloc(&loader[n++], FW_CFG_ACPI_RSDP, 16, LOADER_ZONE_FSEG);
+
+ /* Patch XSDT entries (relative offsets -> absolute addresses). */
+ loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES,
+ l->xsdt_off + offsetof(struct acpi_xsdt, x_tables[0]), 8);
+ loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES,
+ l->xsdt_off + offsetof(struct acpi_xsdt, x_tables[1]), 8);
+
+ /* Patch FADT's DSDT pointer in both 32-bit and 64-bit fields. */
+ loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES,
+ l->fadt_off + offsetof(struct acpi_fadt, f_dsdt), 4);
+ loader_add_pointer(&loader[n++], FW_CFG_ACPI_TABLES, FW_CFG_ACPI_TABLES,
+ l->fadt_off + offsetof(struct acpi_fadt, f_x_dsdt), 8);
+
+ /* Recompute table checksums after all pointer patching. */
+ loader_add_checksum(&loader[n++], FW_CFG_ACPI_TABLES,
+ l->xsdt_off + offsetof(struct acpi_table_hdr, th_checksum),
+ l->xsdt_off, l->xsdt_sz);
+ loader_add_checksum(&loader[n++], FW_CFG_ACPI_TABLES,
+ l->fadt_off + offsetof(struct acpi_table_hdr, th_checksum),
+ l->fadt_off, l->fadt_sz);
+ loader_add_checksum(&loader[n++], FW_CFG_ACPI_TABLES,
+ l->madt_off + offsetof(struct acpi_table_hdr, th_checksum),
+ l->madt_off, l->madt_sz);
+
+ /* Patch RSDP's XSDT address (XSDT is at offset 0 of the blob). */
+ loader_add_pointer(&loader[n++], FW_CFG_ACPI_RSDP, FW_CFG_ACPI_TABLES,
+ offsetof(struct acpi_rsdp, rsdp_xsdt), 8);
+
+ /* Recompute RSDP checksums after the XSDT address is written. */
+ loader_add_checksum(&loader[n++], FW_CFG_ACPI_RSDP,
+ offsetof(struct acpi_rsdp, rsdp_checksum),
+ 0, 20); /* ACPI 1.0: first 20 bytes */
+ loader_add_checksum(&loader[n++], FW_CFG_ACPI_RSDP,
+ offsetof(struct acpi_rsdp, rsdp_ext_checksum),
+ 0, sizeof(struct acpi_rsdp)); /* ACPI 2.0: all 36 bytes */
+
+ if (n != maxcmds)
+ fatalx("%s: loader command count mismatch", __func__);
+
+ *ncmds = n;
+ return loader;
+}
+
+/*
+ * acpi_register_fw_cfg - expose ACPI tables to SeaBIOS via fw_cfg.
+ *
+ * SeaBIOS manages the 0xe0000-0xfffff region as its own "high table"
+ * zone and overwrites anything vmd wrote there during POST. The correct
+ * handoff is the QEMU/SeaBIOS table-loader protocol: vmd registers three
+ * fw_cfg files and SeaBIOS allocates, patches, and checksums the tables
+ * at run time inside its own ACPI initialisation sequence.
+ */
+void
+acpi_register_fw_cfg(size_t ncpus)
+{
+ struct acpi_blob_layout l;
+ struct acpi_rsdp rsdp = {0};
+ uint8_t *tables;
+ struct loader_entry *loader;
+ int ncmds;
+
+ tables = acpi_build_tables_blob(ncpus, &l);
+
+ /*
+ * rsdp_xsdt = 0: XSDT is at offset 0 of the tables blob.
+ * ADD_POINTER converts it to the absolute physical address at run time.
+ * Both checksum fields are zeroed here; ADD_CHECKSUM fills them in.
+ */
+ memcpy(rsdp.rsdp_signature, RSDP_SIG, sizeof(rsdp.rsdp_signature));
+ memcpy(rsdp.rsdp_oemid, VMD_ACPI_OEM_ID, sizeof(rsdp.rsdp_oemid));
+ rsdp.rsdp_revision = ACPI_REV_2;
+ rsdp.rsdp_length = sizeof(rsdp);
+
+ loader = acpi_build_loader(&l, &ncmds);
+
+ fw_cfg_add_file(FW_CFG_TABLE_LOADER, loader, (size_t)ncmds * sizeof(*loader));
+ fw_cfg_add_file(FW_CFG_ACPI_TABLES, tables, l.total_sz);
+ fw_cfg_add_file(FW_CFG_ACPI_RSDP, &rsdp, sizeof(rsdp));
+
+ log_debug("%s: ACPI tables written (%zu VCPU%s)", __func__,
+ ncpus, ncpus == 1 ? "" : "s");
+
+ free(tables);
+ free(loader);
+}
+
+/*
+ * acpi_write_tables - write ACPI tables into the BIOS scan window.
+ *
+ * Used by the direct kernel boot path (-b): places tables at 0xe0000 so
+ * the kernel's acpi_probe() finds them. No firmware runs to overwrite
+ * this region.
+ */
+void
+acpi_write_tables(size_t ncpus)
+{
+ acpi_write_rsdp();
+ acpi_write_xsdt();
+ acpi_write_facs();
+ acpi_write_dsdt();
+ acpi_write_fadt();
+ acpi_write_madt(ncpus);
+
+ log_debug("%s: ACPI tables written (%zu VCPU%s)", __func__,
+ ncpus, ncpus == 1 ? "" : "s");
+}
diff --git a/usr.sbin/vmd/acpi.h b/usr.sbin/vmd/acpi.h
new file mode 100644
index 00000000000..d05c23da5ef
--- /dev/null
+++ b/usr.sbin/vmd/acpi.h
@@ -0,0 +1,228 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2005 Thorsten Lockert <tholo@sigmasoft.com>
+ * Copyright (c) 2005 Marco Peereboom <marco@openbsd.org>
+ * Copyright (c) 2026 Miguel Landaeta <miguel@miguel.cc>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _ACPI_H_
+#define _ACPI_H_
+
+#include "vmd.h"
+
+/*
+ * Physical base addresses for each table, all within the BIOS-reserved
+ * region 0x90000-0xfffff (VM_MEM_RESERVED, accessible via write_mem).
+ * The RSDP must be 16-byte aligned and within 0xe0000-0xfffff.
+ */
+#define ACPI_RSDP_BASE 0xe0000ULL
+#define ACPI_XSDT_BASE 0xe0040ULL
+#define ACPI_FADT_BASE 0xe0080ULL
+#define ACPI_FACS_BASE 0xe0180ULL
+#define ACPI_DSDT_BASE 0xe01c0ULL
+#define ACPI_MADT_BASE 0xe0200ULL
+
+#define LAPIC_BASE 0xfee00000UL
+
+/* FADT flags (ACPI spec S5.2.9) */
+#define FADT_WBINVD 0x00000001 /* WBINVD supported */
+#define FADT_PROC_C1 0x00000004 /* C1 via HLT supported */
+
+/* FADT IA-PC Boot Architecture flags */
+#define FADT_LEGACY_DEVICES 0x0001
+#define FADT_KBD_CTRL 0x0002 /* 8042 keyboard controller present */
+
+/* MADT header flags */
+#define MADT_PCAT_COMPAT 0x00000001 /* 8259 PIC present and initialised */
+
+/* MADT subtable types */
+#define MADT_TYPE_LAPIC 0
+#define MADT_TYPE_IOAPIC 1
+
+/* MADT Local APIC flags */
+#define LAPIC_ENABLED 0x00000001
+
+
+/* ACPI structure definitions (ACPI 2.0 / ACPI spec S5.2) */
+
+/*
+ * Root System Description Pointer.
+ * ACPI 1.0 portion: first 20 bytes (rsdp_checksum covers these).
+ * ACPI 2.0 extension: bytes 20-35 (rsdp_ext_checksum covers all 36).
+ */
+struct acpi_rsdp {
+ uint8_t rsdp_signature[8]; /* "RSD PTR " */
+ uint8_t rsdp_checksum; /* sum of bytes 0-19 == 0 */
+ uint8_t rsdp_oemid[6];
+ uint8_t rsdp_revision; /* 2 = ACPI 2.0+ */
+ uint32_t rsdp_rsdt; /* 32-bit RSDT addr (unused) */
+ uint32_t rsdp_length; /* total length (36) */
+ uint64_t rsdp_xsdt; /* 64-bit XSDT physical addr */
+ uint8_t rsdp_ext_checksum; /* sum of all 36 bytes == 0 */
+ uint8_t rsdp_reserved[3];
+} __packed;
+
+/* Standard 36-byte header present in all SDTs (except FACS). */
+struct acpi_table_hdr {
+ uint8_t th_signature[4];
+ uint32_t th_length;
+ uint8_t th_revision;
+ uint8_t th_checksum;
+ uint8_t th_oemid[6];
+ uint8_t th_oem_tableid[8];
+ uint32_t th_oem_revision;
+ uint8_t th_creator_id[4];
+ uint32_t th_creator_revision;
+} __packed;
+
+/*
+ * Maximum number of SDT pointers the XSDT can hold. Only the first
+ * ntables slots are populated; th_length reflects the actual size so
+ * guests do not follow the trailing zero entries.
+ */
+#define ACPI_XSDT_MAX_TABLES 8
+
+/* XSDT: points to other tables via 64-bit physical addresses. */
+struct acpi_xsdt {
+ struct acpi_table_hdr x_hdr;
+ uint64_t x_tables[ACPI_XSDT_MAX_TABLES];
+} __packed;
+
+/*
+ * Layout of the etc/acpi/tables fw_cfg blob: one contiguous buffer
+ * containing XSDT, FADT, MADT and DSDT packed sequentially.
+ * Computed by acpi_build_tables_blob() and consumed by acpi_build_loader().
+ */
+struct acpi_blob_layout {
+ size_t xsdt_off, xsdt_sz;
+ size_t fadt_off, fadt_sz;
+ size_t madt_off, madt_sz;
+ size_t dsdt_off, dsdt_sz;
+ size_t total_sz;
+};
+
+/*
+ * Firmware ACPI Control Structure.
+ * Special: no standard table header, no checksum field.
+ */
+struct acpi_facs {
+ uint8_t facs_signature[4]; /* "FACS" */
+ uint32_t facs_length; /* 64 */
+ uint32_t facs_hw_signature;
+ uint32_t facs_wakeup_vector;
+ uint32_t facs_global_lock;
+ uint32_t facs_flags;
+ uint64_t facs_x_wakeup_vector;
+ uint8_t facs_version; /* 2 = ACPI 2.0 */
+ uint8_t facs_reserved[31];
+} __packed;
+
+/* Empty DSDT (no AML), referenced by FADT */
+struct acpi_dsdt {
+ struct acpi_table_hdr d_hdr;
+} __packed;
+
+/*
+ * Generic Address Structure: encodes a register location.
+ * Used in the FADT extended fields.
+ */
+struct acpi_gas {
+ uint8_t gas_addrspace; /* 0 = memory, 1 = I/O */
+ uint8_t gas_bitwidth;
+ uint8_t gas_bitoffset;
+ uint8_t gas_accesssize;
+ uint64_t gas_address;
+} __packed;
+
+/* Fixed ACPI Description Table, ACPI 2.0 (revision 3). */
+struct acpi_fadt {
+ struct acpi_table_hdr f_hdr; /* "FACP", rev 3 */
+ uint32_t f_firmware_ctrl; /* 32-bit FACS addr */
+ uint32_t f_dsdt; /* 32-bit DSDT addr */
+ uint8_t f_int_model; /* obsolete in rev >= 3 */
+ uint8_t f_pm_profile;
+ uint16_t f_sci_int; /* SCI IRQ number */
+ uint32_t f_smi_cmd; /* 0 = no SMI port */
+ uint8_t f_acpi_enable;
+ uint8_t f_acpi_disable;
+ uint8_t f_s4bios_req;
+ uint8_t f_pstate_cnt;
+ uint32_t f_pm1a_evt_blk;
+ uint32_t f_pm1b_evt_blk;
+ uint32_t f_pm1a_cnt_blk;
+ uint32_t f_pm1b_cnt_blk;
+ uint32_t f_pm2_cnt_blk;
+ uint32_t f_pm_tmr_blk;
+ uint32_t f_gpe0_blk;
+ uint32_t f_gpe1_blk;
+ uint8_t f_pm1_evt_len;
+ uint8_t f_pm1_cnt_len;
+ uint8_t f_pm2_cnt_len;
+ uint8_t f_pm_tmr_len;
+ uint8_t f_gpe0_blk_len;
+ uint8_t f_gpe1_blk_len;
+ uint8_t f_gpe1_base;
+ uint8_t f_cst_cnt;
+ uint16_t f_p_lvl2_lat;
+ uint16_t f_p_lvl3_lat;
+ uint16_t f_flush_size;
+ uint16_t f_flush_stride;
+ uint8_t f_duty_offset;
+ uint8_t f_duty_width;
+ uint8_t f_day_alrm;
+ uint8_t f_mon_alrm;
+ uint8_t f_century;
+ uint16_t f_iapc_boot_arch;
+ uint8_t f_reserved1;
+ uint32_t f_flags;
+ struct acpi_gas f_reset_reg; /* reset register (unused) */
+ uint8_t f_reset_value;
+ uint8_t f_reserved2[3];
+ uint64_t f_x_firmware_ctrl; /* 64-bit FACS addr */
+ uint64_t f_x_dsdt; /* 64-bit DSDT addr */
+ struct acpi_gas f_x_pm1a_evt_blk;
+ struct acpi_gas f_x_pm1b_evt_blk;
+ struct acpi_gas f_x_pm1a_cnt_blk;
+ struct acpi_gas f_x_pm1b_cnt_blk;
+ struct acpi_gas f_x_pm2_cnt_blk;
+ struct acpi_gas f_x_pm_tmr_blk;
+ struct acpi_gas f_x_gpe0_blk;
+ struct acpi_gas f_x_gpe1_blk;
+} __packed;
+
+/*
+ * Multiple APIC Description Table header.
+ * Followed immediately by a variable-length list of APIC subtables.
+ */
+struct acpi_madt_hdr {
+ struct acpi_table_hdr m_hdr; /* "APIC", rev 1 */
+ uint32_t m_lapic_addr; /* Local APIC physical base */
+ uint32_t m_flags;
+} __packed;
+
+/* MADT subtable: Processor Local APIC (type 0). */
+struct acpi_madt_lapic {
+ uint8_t ml_type; /* MADT_TYPE_LAPIC */
+ uint8_t ml_length; /* sizeof(struct acpi_madt_lapic) */
+ uint8_t ml_proc_id; /* ACPI processor ID */
+ uint8_t ml_apic_id; /* Local APIC ID */
+ uint32_t ml_flags; /* LAPIC_ENABLED */
+} __packed;
+
+
+void acpi_write_tables(size_t ncpus);
+void acpi_register_fw_cfg(size_t ncpus);
+
+#endif /* _ACPI_H_ */
diff --git a/usr.sbin/vmd/fw_cfg.c b/usr.sbin/vmd/fw_cfg.c
index 3d096f9a4a5..ef541f384e1 100644
--- a/usr.sbin/vmd/fw_cfg.c
+++ b/usr.sbin/vmd/fw_cfg.c
@@ -23,6 +23,7 @@
#include <string.h>
#include <unistd.h>
+#include "acpi.h"
#include "atomicio.h"
#include "pci.h"
#include "vmd.h"
@@ -96,6 +97,8 @@ fw_cfg_init(struct vmop_create_params *vmc)
}
fw_cfg_add_file("etc/e820", &e820, e820_len);
+ acpi_register_fw_cfg(vmc->vmc_ncpus);
+
/* do not double print chars on serial port */
fw_cfg_add_file("etc/screen-and-debug", &sd, sizeof(sd));
diff --git a/usr.sbin/vmd/fw_cfg.h b/usr.sbin/vmd/fw_cfg.h
index b367584d5d7..9f07a923481 100644
--- a/usr.sbin/vmd/fw_cfg.h
+++ b/usr.sbin/vmd/fw_cfg.h
@@ -25,6 +25,10 @@
#define FW_CFG_IO_DMA_ADDR_HIGH 0x514
#define FW_CFG_IO_DMA_ADDR_LOW 0x518
+#define FW_CFG_TABLE_LOADER "etc/table-loader"
+#define FW_CFG_ACPI_TABLES "etc/acpi/tables"
+#define FW_CFG_ACPI_RSDP "etc/acpi/rsdp"
+
void fw_cfg_init(struct vmop_create_params *);
uint8_t vcpu_exit_fw_cfg(struct vm_run_params *);
uint8_t vcpu_exit_fw_cfg_dma(struct vm_run_params *);
diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c
index 5337204a1a0..cd00023f17b 100644
--- a/usr.sbin/vmd/loadfile_elf.c
+++ b/usr.sbin/vmd/loadfile_elf.c
@@ -99,6 +99,7 @@
#include <machine/specialreg.h>
#include <machine/pte.h>
+#include "acpi.h"
#include "loadfile.h"
#include "vmd.h"
@@ -254,6 +255,8 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
bootargsz = push_bootargs(memmap, n, bootmac);
stacksize = push_stack(bootargsz, marks[MARK_END]);
+ acpi_write_tables(vm->vm_params.vmc_ncpus);
+
vrs->vrs_gprs[VCPU_REGS_RIP] = (uint64_t)marks[MARK_ENTRY];
vrs->vrs_gprs[VCPU_REGS_RSP] = (uint64_t)(STACK_PAGE + PAGE_SIZE) - stacksize;
vrs->vrs_gdtr.vsi_base = GDT_PAGE;
--
2.54.0
[4/5] vmd: generate and expose ACPI tables to the guest via fw_cfg