From: Vladimir Serbinenko Subject: [PATCH 1/2] efiboot/amd64: Switch to own pagetable before booting To: tech@openbsd.org Cc: Vladimir Serbinenko Date: Sat, 1 Mar 2025 16:21:06 +0300 On my system pagetable is right where usual place for OpenBSD kernel is. So we end up overwriting it no matter what. To fix the issue, create our own temporary table at a address right after loaded kernel. This page table ends up discarded when we switch to 32-bit mode. Roughly based on my GRUB patch for the same problem --- sys/arch/amd64/stand/efiboot/exec_i386.c | 78 ++++++++++++------------ 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/sys/arch/amd64/stand/efiboot/exec_i386.c b/sys/arch/amd64/stand/efiboot/exec_i386.c index 75451897a..7b1e26351 100644 --- a/sys/arch/amd64/stand/efiboot/exec_i386.c +++ b/sys/arch/amd64/stand/efiboot/exec_i386.c @@ -53,11 +53,13 @@ extern EFI_BOOT_SERVICES *BS; +extern bios_memmap_t bios_memmap[]; /* This is easier */ + typedef void (*startfuncp)(int, int, int, int, int, int, int, int) __attribute__ ((noreturn)); void ucode_load(void); -void protect_writeable(uint64_t, size_t); +void change_pagetable(uint64_t); extern struct cmd_state cmd; char *bootmac = NULL; @@ -145,8 +147,7 @@ run_loadfile(uint64_t *marks, int howto) * ExitBootServices(). */ #ifdef __amd64__ - protect_writeable(marks[MARK_START] + delta, - marks[MARK_END] - marks[MARK_START]); + change_pagetable(marks[MARK_END]); #endif memmove((void *)marks[MARK_START] + delta, (void *)marks[MARK_START], marks[MARK_END] - marks[MARK_START]); @@ -266,13 +267,24 @@ detect_sev(void) return 0; } +static uint64_t +find_max_size (void) +{ + uint64_t max_ram_size = 1ULL << 32; + register bios_memmap_t *p; + + for (p = bios_memmap; p->type != BIOS_MAP_END; p++) { + if (max_ram_size < p->addr + p->size) + max_ram_size = p->addr + p->size; + } + + return max_ram_size; +} + void -protect_writeable(uint64_t addr, size_t len) +change_pagetable(uint64_t addr) { - uint64_t end = addr + len; - uint64_t *cr3, *p; - uint64_t cr0; - size_t idx; + uint64_t cr0, i; if (detect_sev() == 0) return; @@ -280,36 +292,26 @@ protect_writeable(uint64_t addr, size_t len) __asm volatile("movq %%cr0, %0;" : "=r"(cr0) : :); if ((cr0 & CR0_PG) == 0) return; - __asm volatile("movq %%cr3, %0;" : "=r"(cr3) : :); - - for (addr &= ~(uint64_t)PAGE_MASK; addr < end; addr += PAGE_SIZE) { - idx = (addr & L4_MASK) >> L4_SHIFT; - if ((cr3[idx] & PG_RW) == 0) - cr3[idx] |= PG_RW; - if (cr3[idx] & PG_PS) - continue; - p = (uint64_t *)(cr3[idx] & PG_FRAME); - - idx = (addr & L3_MASK) >> L3_SHIFT; - if ((p[idx] & PG_RW) == 0) - p[idx] |= PG_RW; - if (p[idx] & PG_PS) - continue; - p = (uint64_t *)(p[idx] & PG_FRAME); - - idx = (addr & L2_MASK) >> L2_SHIFT; - if ((p[idx] & PG_RW) == 0) - p[idx] |= PG_RW; - if (p[idx] & PG_PS) - continue; - p = (uint64_t *)(p[idx] & PG_FRAME); - - idx = (addr & L1_MASK) >> L1_SHIFT; - if ((p[idx] & PG_RW) == 0) - p[idx] |= PG_RW; - } - /* tlb flush */ - __asm volatile("movq %0,%%cr3" : : "r"(cr3) :); + addr = (addr + PAGE_SIZE) & ~(uint64_t)PAGE_MASK; + + uint64_t nentries = (find_max_size () + 0x1fffff) >> 21; + uint64_t npt2pages = (nentries + 0x1ff) >> 9; + uint64_t npt3pages = (npt2pages + 0x1ff) >> 9; + uint64_t *pt4 = (uint64_t *) addr; + uint64_t *pt3 = pt4 + 0x200; + uint64_t *pt2 = pt3 + (npt3pages << 9); + + for (i = 0; i < npt3pages; i++) + pt4[i] = ((uint64_t)pt3 + (i << 12)) | PG_u | PG_RW | PG_V; + + for (i = 0; i < npt2pages; i++) + pt3[i] = ((uint64_t)pt2 + (i << 12)) | PG_u | PG_RW | PG_V; + + for (i = 0; i < (npt2pages << 9); i++) + pt2[i] = (i << 21) | PG_PS | PG_u | PG_RW | PG_V; + + /* change page table */ + __asm volatile("movq %0,%%cr3" : : "r"(pt4) :); } #endif -- 2.39.5