Index | Thread | Search

From:
Hans-Jörg Höxer <hshoexer@genua.de>
Subject:
Re: vmd(8): Use 32-bit direct kernel launch for both amd64 and i386
To:
<tech@openbsd.org>
Cc:
<hshoexer@yerbouti.franken.de>
Date:
Wed, 19 Nov 2025 12:48:59 +0100

Download raw body.

Thread
Hi,

I'd like to revisit this diff.  Here' the context from before the last
release:

 o I want to use 32-bit (legacy) mode for direct kernel launch for both
   amd64 and i386; this will simplify the SEV-ES related code dealing
   with #VC traps during locore0

 o right now we actually use compatibility mode; this implies that
   exceptions will be raised by 64 bit rules, etc.

 o the problem with my initial diff was, that it broke VMX/EPT support
   in vmm(4) by tricking vmm(4) into assuming "restricted" guest mode;
   even when the CPU actually supports unrestricted guests

 o mlarkin removed the support for restricted guests [1]; with this my
   diff works now on both VMX/EPT and SVM/RVI

Tests are welcome :-)
And oks, too.

Take care,
HJ.

[1] https://marc.info/?l=openbsd-cvs&m=175786504306946&w=2

--------------------------------------------------------------------------
diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c
index 5f67953fb50..8925736aece 100644
--- a/usr.sbin/vmd/loadfile_elf.c
+++ b/usr.sbin/vmd/loadfile_elf.c
@@ -110,15 +110,13 @@ union {
 } hdr;
 
 static void setsegment(struct mem_segment_descriptor *, uint32_t,
-    size_t, int, int, int, int, int);
+    size_t, int, int, int, int);
 static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int);
 static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int);
 static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
 static uint32_t push_bootargs(bios_memmap_t *, size_t, bios_bootmac_t *);
 static size_t push_stack(uint32_t, uint32_t);
 static void push_gdt(void);
-static void push_pt_32(void);
-static void push_pt_64(void);
 static void marc4random_buf(paddr_t, int);
 static void mbzero(paddr_t, int);
 static void mbcopy(void *, paddr_t, int);
@@ -126,8 +124,6 @@ static void mbcopy(void *, paddr_t, int);
 extern char *__progname;
 extern int vm_id;
 
-uint64_t pg_crypt = 0;
-
 /*
  * setsegment
  *
@@ -148,7 +144,7 @@ uint64_t pg_crypt = 0;
  */
 static void
 setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
-    int type, int dpl, int def32, int gran, int lm)
+    int type, int dpl, int def32, int gran)
 {
 	sd->sd_lolimit = (int)limit;
 	sd->sd_lobase = (int)base;
@@ -157,7 +153,7 @@ setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
 	sd->sd_p = 1;
 	sd->sd_hilimit = (int)limit >> 16;
 	sd->sd_avl = 0;
-	sd->sd_long = lm;
+	sd->sd_long = 0;
 	sd->sd_def32 = def32;
 	sd->sd_gran = gran;
 	sd->sd_hibase = (int)base >> 24;
@@ -185,71 +181,16 @@ push_gdt(void)
 	 * Create three segment descriptors:
 	 *
 	 * GDT[0] : null descriptor. "Created" via memset above.
-	 * GDT[1] (selector @ 0x8): Executable segment (compat mode), for CS
+	 * GDT[1] (selector @ 0x8): Executable segment, for CS
 	 * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
-	 * GDT[3] (selector @ 0x18): Executable segment (long mode), for CS
 	 */
-	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1, 0);
-	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1, 0);
-	setsegment(&sd[3], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 0, 1, 1);
+	setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
+	setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
 
 	write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
 	sev_register_encryption(GDT_PAGE, PAGE_SIZE);
 }
 
-/*
- * push_pt_32
- *
- * Create an identity-mapped page directory hierarchy mapping the first
- * 4GB of physical memory. This is used during bootstrapping i386 VMs on
- * CPUs without unrestricted guest capability.
- */
-static void
-push_pt_32(void)
-{
-	uint32_t ptes[1024], i;
-
-	memset(ptes, 0, sizeof(ptes));
-	for (i = 0 ; i < 1024; i++) {
-		ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((4096 * 1024) * i);
-	}
-	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
-}
-
-/*
- * push_pt_64
- *
- * Create an identity-mapped page directory hierarchy mapping the first
- * 1GB of physical memory. This is used during bootstrapping 64 bit VMs on
- * CPUs without unrestricted guest capability.
- */
-static void
-push_pt_64(void)
-{
-	uint64_t ptes[512], i;
-
-	/* PDPDE0 - first 1GB */
-	memset(ptes, 0, sizeof(ptes));
-	ptes[0] = pg_crypt | PG_V | PML3_PAGE;
-	write_mem(PML4_PAGE, ptes, PAGE_SIZE);
-	sev_register_encryption(PML4_PAGE, PAGE_SIZE);
-
-	/* PDE0 - first 1GB */
-	memset(ptes, 0, sizeof(ptes));
-	ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE;
-	write_mem(PML3_PAGE, ptes, PAGE_SIZE);
-	sev_register_encryption(PML3_PAGE, PAGE_SIZE);
-
-	/* First 1GB (in 2MB pages) */
-	memset(ptes, 0, sizeof(ptes));
-	for (i = 0 ; i < 512; i++) {
-		ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS |
-		    ((2048 * 1024) * i);
-	}
-	write_mem(PML2_PAGE, ptes, PAGE_SIZE);
-	sev_register_encryption(PML2_PAGE, PAGE_SIZE);
-}
-
 /*
  * loadfile_elf
  *
@@ -271,7 +212,7 @@ int
 loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
     unsigned int bootdevice)
 {
-	int r, is_i386 = 0;
+	int r;
 	uint32_t bootargsz;
 	size_t n, stacksize;
 	u_long marks[MARK_MAX];
@@ -286,7 +227,6 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
 	if (memcmp(hdr.elf32.e_ident, ELFMAG, SELFMAG) == 0 &&
 	    hdr.elf32.e_ident[EI_CLASS] == ELFCLASS32) {
 		r = elf32_exec(fp, &hdr.elf32, marks, LOAD_ALL);
-		is_i386 = 1;
 	} else if (memcmp(hdr.elf64.e_ident, ELFMAG, SELFMAG) == 0 &&
 	    hdr.elf64.e_ident[EI_CLASS] == ELFCLASS64) {
 		r = elf64_exec(fp, &hdr.elf64, marks, LOAD_ALL);
@@ -298,25 +238,15 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
 
 	push_gdt();
 
-	if (is_i386) {
-		push_pt_32();
-		/* Reconfigure the default flat-64 register set for 32 bit */
-		vrs->vrs_crs[VCPU_REGS_CR3] = PML3_PAGE;
-		vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
-		vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
-	}
-	else {
-		if (vcp->vcp_sev) {
-			if (vcp->vcp_poscbit == 0) {
-				log_warnx("SEV enabled but no C-bit reported");
-				return 1;
-			}
-			pg_crypt = (1ULL << vcp->vcp_poscbit);
-			log_debug("%s: poscbit %d pg_crypt 0x%016llx",
-			    __func__, vcp->vcp_poscbit, pg_crypt);
-		}
-		push_pt_64();
-	}
+	/*
+	 * As both amd64 and i386 kernels are launched in 32 bit
+	 * protected mode with paging disabled reconfigure the default
+	 * flat 64 bit register set.
+	 */
+	vrs->vrs_crs[VCPU_REGS_CR3] = 0ULL;
+	vrs->vrs_crs[VCPU_REGS_CR4] = 0ULL;
+	vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
+	vrs->vrs_crs[VCPU_REGS_CR0] = CR0_ET | CR0_PE;
 
 	if (bootdevice == VMBOOTDEV_NET) {
 		bootmac = &bm;