Index | Thread | Search

From:
Mike Larkin <mlarkin@nested.page>
Subject:
Re: [PATCH 1/2] efiboot/amd64: Switch to own pagetable before booting
To:
Vladimir Serbinenko <phcoder@gmail.com>
Cc:
tech@openbsd.org
Date:
Mon, 19 May 2025 01:42:28 -0700

Download raw body.

Thread
On Sat, Mar 01, 2025 at 04:21:06PM +0300, Vladimir Serbinenko wrote:
> On my system pagetable is right where usual place for OpenBSD
> kernel is. So we end up overwriting it no matter what. To fix
> the issue, create our own temporary table at a address right after
> loaded kernel. This page table ends up discarded when we switch to
> 32-bit mode.
>
> Roughly based on my GRUB patch for the same problem

I think this probably fixes a lot of the machines hanging at the bootloader.

Will take a more indepth look at this. Thanks!

-ml


> ---
>  sys/arch/amd64/stand/efiboot/exec_i386.c | 78 ++++++++++++------------
>  1 file changed, 40 insertions(+), 38 deletions(-)
>
> diff --git a/sys/arch/amd64/stand/efiboot/exec_i386.c b/sys/arch/amd64/stand/efiboot/exec_i386.c
> index 75451897a..7b1e26351 100644
> --- a/sys/arch/amd64/stand/efiboot/exec_i386.c
> +++ b/sys/arch/amd64/stand/efiboot/exec_i386.c
> @@ -53,11 +53,13 @@
>
>  extern EFI_BOOT_SERVICES	*BS;
>
> +extern bios_memmap_t bios_memmap[];	/* This is easier */
> +
>  typedef void (*startfuncp)(int, int, int, int, int, int, int, int)
>      __attribute__ ((noreturn));
>
>  void ucode_load(void);
> -void protect_writeable(uint64_t, size_t);
> +void change_pagetable(uint64_t);
>  extern struct cmd_state cmd;
>
>  char *bootmac = NULL;
> @@ -145,8 +147,7 @@ run_loadfile(uint64_t *marks, int howto)
>  	 * ExitBootServices().
>  	 */
>  #ifdef __amd64__
> -	protect_writeable(marks[MARK_START] + delta,
> -	    marks[MARK_END] - marks[MARK_START]);
> +	change_pagetable(marks[MARK_END]);
>  #endif
>  	memmove((void *)marks[MARK_START] + delta, (void *)marks[MARK_START],
>  	    marks[MARK_END] - marks[MARK_START]);
> @@ -266,13 +267,24 @@ detect_sev(void)
>  	return 0;
>  }
>
> +static uint64_t
> +find_max_size (void)
> +{
> +	uint64_t max_ram_size = 1ULL << 32;
> +	register bios_memmap_t *p;
> +
> +	for (p = bios_memmap; p->type != BIOS_MAP_END; p++) {
> +		if (max_ram_size < p->addr + p->size)
> +			max_ram_size = p->addr + p->size;
> +	}
> +
> +	return max_ram_size;
> +}
> +
>  void
> -protect_writeable(uint64_t addr, size_t len)
> +change_pagetable(uint64_t addr)
>  {
> -	uint64_t end = addr + len;
> -	uint64_t *cr3, *p;
> -	uint64_t cr0;
> -	size_t idx;
> +	uint64_t cr0, i;
>
>  	if (detect_sev() == 0)
>  		return;
> @@ -280,36 +292,26 @@ protect_writeable(uint64_t addr, size_t len)
>  	__asm volatile("movq %%cr0, %0;" : "=r"(cr0) : :);
>  	if ((cr0 & CR0_PG) == 0)
>  		return;
> -	__asm volatile("movq %%cr3, %0;" : "=r"(cr3) : :);
> -
> -	for (addr &= ~(uint64_t)PAGE_MASK; addr < end; addr += PAGE_SIZE) {
> -		idx = (addr & L4_MASK) >> L4_SHIFT;
> -		if ((cr3[idx] & PG_RW) == 0)
> -			cr3[idx] |= PG_RW;
> -		if (cr3[idx] & PG_PS)
> -			continue;
> -		p = (uint64_t *)(cr3[idx] & PG_FRAME);
> -
> -		idx = (addr & L3_MASK) >> L3_SHIFT;
> -		if ((p[idx] & PG_RW) == 0)
> -			p[idx] |= PG_RW;
> -		if (p[idx] & PG_PS)
> -			continue;
> -		p = (uint64_t *)(p[idx] & PG_FRAME);
> -
> -		idx = (addr & L2_MASK) >> L2_SHIFT;
> -		if ((p[idx] & PG_RW) == 0)
> -			p[idx] |= PG_RW;
> -		if (p[idx] & PG_PS)
> -			continue;
> -		p = (uint64_t *)(p[idx] & PG_FRAME);
> -
> -		idx = (addr & L1_MASK) >> L1_SHIFT;
> -		if ((p[idx] & PG_RW) == 0)
> -			p[idx] |= PG_RW;
> -	}
>
> -	/* tlb flush */
> -	__asm volatile("movq %0,%%cr3" : : "r"(cr3) :);
> +	addr = (addr + PAGE_SIZE) & ~(uint64_t)PAGE_MASK;
> +
> +	uint64_t nentries = (find_max_size () + 0x1fffff) >> 21;
> +	uint64_t npt2pages = (nentries + 0x1ff) >> 9;
> +	uint64_t npt3pages = (npt2pages + 0x1ff) >> 9;
> +	uint64_t *pt4 = (uint64_t *) addr;
> +	uint64_t *pt3 = pt4 + 0x200;
> +	uint64_t *pt2 = pt3 + (npt3pages << 9);
> +
> +	for (i = 0; i < npt3pages; i++)
> +		pt4[i] = ((uint64_t)pt3 + (i << 12)) | PG_u | PG_RW | PG_V;
> +
> +	for (i = 0; i < npt2pages; i++)
> +		pt3[i] = ((uint64_t)pt2 + (i << 12)) | PG_u | PG_RW | PG_V;
> +
> +	for (i = 0; i < (npt2pages << 9); i++)
> +		pt2[i] = (i << 21) | PG_PS | PG_u | PG_RW | PG_V;
> +
> +	/* change page table */
> +	__asm volatile("movq %0,%%cr3" : : "r"(pt4) :);
>  }
>  #endif
> --
> 2.39.5
>