From: Hans-Jörg Höxer Subject: Re: vmd(8): Use 32-bit direct kernel launch for both amd64 and i386 To: Cc: Date: Wed, 19 Nov 2025 12:48:59 +0100 Hi, I'd like to revisit this diff. Here' the context from before the last release: o I want to use 32-bit (legacy) mode for direct kernel launch for both amd64 and i386; this will simplify the SEV-ES related code dealing with #VC traps during locore0 o right now we actually use compatibility mode; this implies that exceptions will be raised by 64 bit rules, etc. o the problem with my initial diff was, that it broke VMX/EPT support in vmm(4) by tricking vmm(4) into assuming "restricted" guest mode; even when the CPU actually supports unrestricted guests o mlarkin removed the support for restricted guests [1]; with this my diff works now on both VMX/EPT and SVM/RVI Tests are welcome :-) And oks, too. Take care, HJ. [1] https://marc.info/?l=openbsd-cvs&m=175786504306946&w=2 -------------------------------------------------------------------------- diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c index 5f67953fb50..8925736aece 100644 --- a/usr.sbin/vmd/loadfile_elf.c +++ b/usr.sbin/vmd/loadfile_elf.c @@ -110,15 +110,13 @@ union { } hdr; static void setsegment(struct mem_segment_descriptor *, uint32_t, - size_t, int, int, int, int, int); + size_t, int, int, int, int); static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *); static size_t push_stack(uint32_t, uint32_t); static void push_gdt(void); -static void push_pt_32(void); -static void push_pt_64(void); static void marc4random_buf(paddr_t, int); static void mbzero(paddr_t, int); static void mbcopy(void *, paddr_t, int); @@ -126,8 +124,6 @@ static void mbcopy(void *, paddr_t, int); extern char *__progname; extern int vm_id; -uint64_t pg_crypt = 0; - /* * setsegment * @@ -148,7 +144,7 @@ uint64_t pg_crypt = 0; */ static void setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, - int type, int dpl, int def32, int gran, int lm) + int type, int dpl, int def32, int gran) { sd->sd_lolimit = (int)limit; sd->sd_lobase = (int)base; @@ -157,7 +153,7 @@ setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, sd->sd_p = 1; sd->sd_hilimit = (int)limit >> 16; sd->sd_avl = 0; - sd->sd_long = lm; + sd->sd_long = 0; sd->sd_def32 = def32; sd->sd_gran = gran; sd->sd_hibase = (int)base >> 24; @@ -185,71 +181,16 @@ push_gdt(void) * Create three segment descriptors: * * GDT[0] : null descriptor. "Created" via memset above. - * GDT[1] (selector @ 0x8): Executable segment (compat mode), for CS + * GDT[1] (selector @ 0x8): Executable segment, for CS * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS - * GDT[3] (selector @ 0x18): Executable segment (long mode), for CS */ - setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1, 0); - setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1, 0); - setsegment(&sd[3], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 0, 1, 1); + setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); + setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); sev_register_encryption(GDT_PAGE, PAGE_SIZE); } -/* - * push_pt_32 - * - * Create an identity-mapped page directory hierarchy mapping the first - * 4GB of physical memory. This is used during bootstrapping i386 VMs on - * CPUs without unrestricted guest capability. - */ -static void -push_pt_32(void) -{ - uint32_t ptes[1024], i; - - memset(ptes, 0, sizeof(ptes)); - for (i = 0 ; i < 1024; i++) { - ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i); - } - write_mem(PML3_PAGE, ptes, PAGE_SIZE); -} - -/* - * push_pt_64 - * - * Create an identity-mapped page directory hierarchy mapping the first - * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on - * CPUs without unrestricted guest capability. - */ -static void -push_pt_64(void) -{ - uint64_t ptes[512], i; - - /* PDPDE0 - first 1GB */ - memset(ptes, 0, sizeof(ptes)); - ptes[0] = pg_crypt | PG_V | PML3_PAGE; - write_mem(PML4_PAGE, ptes, PAGE_SIZE); - sev_register_encryption(PML4_PAGE, PAGE_SIZE); - - /* PDE0 - first 1GB */ - memset(ptes, 0, sizeof(ptes)); - ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE; - write_mem(PML3_PAGE, ptes, PAGE_SIZE); - sev_register_encryption(PML3_PAGE, PAGE_SIZE); - - /* First 1GB (in 2MB pages) */ - memset(ptes, 0, sizeof(ptes)); - for (i = 0 ; i < 512; i++) { - ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS | - ((2048 * 1024) * i); - } - write_mem(PML2_PAGE, ptes, PAGE_SIZE); - sev_register_encryption(PML2_PAGE, PAGE_SIZE); -} - /* * loadfile_elf * @@ -271,7 +212,7 @@ int loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, unsigned int bootdevice) { - int r, is_i386 = 0; + int r; uint32_t bootargsz; size_t n, stacksize; u_long marks[MARK_MAX]; @@ -286,7 +227,6 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 && hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) { r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL); - is_i386 = 1; } else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 && hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) { r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL); @@ -298,25 +238,15 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, push_gdt(); - if (is_i386) { - push_pt_32(); - /* Reconfigure the default flat-64 register set for 32 bit */ - vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE; - vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; - vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; - } - else { - if (vcp->vcp_sev) { - if (vcp->vcp_poscbit == 0) { - log_warnx("SEV enabled but no C-bit reported"); - return 1; - } - pg_crypt = (1ULL << vcp->vcp_poscbit); - log_debug("%s: poscbit %d pg_crypt 0x%016llx", - __func__, vcp->vcp_poscbit, pg_crypt); - } - push_pt_64(); - } + /* + * As both amd64 and i386 kernels are launched in 32 bit + * protected mode with paging disabled reconfigure the default + * flat 64 bit register set. + */ + vrs->vrs_crs[VCPU_REGS_CR3] = 0ULL; + vrs->vrs_crs[VCPU_REGS_CR4] = 0ULL; + vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; + vrs->vrs_crs[VCPU_REGS_CR0] = CR0_ET | CR0_PE; if (bootdevice == VMBOOTDEV_NET) { bootmac = &bm;