Index | Thread | Search

From:
Mike Larkin <mlarkin@nested.page>
Subject:
Re: SEV: Modifiy IPI handling to work with the #VC trap handler
To:
Stefan Fritsch <sf@sfritsch.de>
Cc:
tech@openbsd.org
Date:
Fri, 16 Jan 2026 19:21:02 -0800

Download raw body.

Thread
On Fri, Jan 16, 2026 at 10:56:48AM +0100, Stefan Fritsch wrote:
> Hi,
>
> this is the next commit on the way to make SEV-ES work with MP.
>
> With SEV-ES the wrmsr instruction used to access x2apic registers traps
> into the #VC trap handler. The trap handler will need the kernel GS_BASE
> once we move the ghcb address into a per-cpu var for MP. So, if we come
> from the userland, we need to do swapgs. The fast IPI handlers do not
> perform swapgs, so we add a special variant of the x2apic_eoi function
> that also does swapgs.
>
> Parts from Sebastian Sturm
>
> ok?
>
> Cheers,
> Stefan
>

I think we should wait and make sure the recent changes are solid. There
has been a fair amount of churn in this area (amd64 IPIs) recently and I don't
want to be piling on a bunch of other stuff too quickly.

Other comments see below.

-ml

> diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
> index b19dcb349e3..b99e4ab80d7 100644
> --- a/sys/arch/amd64/amd64/lapic.c
> +++ b/sys/arch/amd64/amd64/lapic.c
> @@ -99,6 +99,7 @@ struct pic local_pic = {
>  };
>
>  extern int x2apic_eoi;
> +extern int x2apic_eoi_swapgs;
>  int x2apic_enabled = 0;
>
>  u_int32_t x2apic_readreg(int reg);
> @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
>  #endif
>  		x2apic_enabled = 1;
>  		codepatch_call(CPTAG_EOI, &x2apic_eoi);
> +		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> +			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);

x2apic_eoi_swapgs should be named something "for SEV" since just looking at that
name doesn't convey that this is "for SEV only".

> +		else
> +			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
>
>  		va = (vaddr_t)&local_apic;
>  	} else {
> @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
>  		pte = kvtopte(va);
>  		*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
>  		invlpg(va);
> +
> +		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> +			panic("xAPIC mode not implemented for SEV-ES");

does this affect openbsd SEV-ES guests?

>  	}
>
>  	/*
> diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
> index 9de0ee97f08..f78d65f66ae 100644
> --- a/sys/arch/amd64/amd64/vector.S
> +++ b/sys/arch/amd64/amd64/vector.S
> @@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
>  	lfence
>  END(x2apic_eoi)
>
> +/*
> + * With SEV-ES the wrmsr instruction traps into the #VC handler which
> + * needs the kernel GS_BASE. So if we come from the userland, we need to
> + * do swapgs. The fast IPI handler does not perform swapgs, so we need
> + * to do it here. In order to detect whether we come from user or kernel
> + * land, this function MUST be called before %rsp is modified.
> + */
> +KUENTRY(x2apic_eoi_swapgs)
> +	testb	$SEL_RPL,16(%rsp)
> +	jz	1f
> +	swapgs
> +	FENCE_SWAPGS_MIS_TAKEN
> +1:

please get guenther@'s ok here. There be dragons in this area (%gs
modifications, interrupt handler concerns, etc). If you end up in places
with wrong %gs you're hooped.

> +	pushq	%rax
> +	pushq	%rcx
> +	pushq	%rdx
> +	mov     $MSR_X2APIC_EOI,%ecx
> +	mov     $0,%eax
> +	mov     $0,%edx

xorl %eax, %eax
xorl %edx, %edx

for consistency with other areas

> +	wrmsr
> +	popq	%rdx
> +	popq	%rcx
> +	popq	%rax
> +
> +	testb	$SEL_RPL,16(%rsp)
> +	jz	2f
> +	swapgs
> +	FENCE_SWAPGS_MIS_TAKEN
> +2:
> +	retq
> +	lfence
> +END(x2apic_eoi_swapgs)
> +
>  #if NLAPIC > 0
>  #ifdef MULTIPROCESSOR
>  KIDTVEC(recurse_lapic_ipi)
> @@ -630,9 +663,9 @@ END(Xresume_lapic_ipi)
>   */
>  /* invalidate the entire TLB, no PCIDs version */
>  IDTVEC(ipi_invltlb)
> -	pushq	%rax
> +	ioapic_asm_ack_fast_ipi()
>
> -	ioapic_asm_ack()
> +	pushq	%rax
>
>  	movq	%cr3, %rax
>  	movq	%rax, %cr3
> @@ -652,11 +685,11 @@ END(Xipi_invltlb)
>  #if NVMM > 0
>  /* Invalidate VMX EPT */
>  IDTVEC(ipi_invept)
> +	ioapic_asm_ack_fast_ipi()
> +
>  	pushq	%rax
>  	pushq	%rdx
>
> -	ioapic_asm_ack()
> -
>  	movq	$ept_shoot_vid, %rax
>  	movq	ept_shoot_mode, %rdx
>  	invept	(%rax), %rdx
> @@ -677,9 +710,9 @@ END(Xipi_invept)
>
>  /* invalidate a single page, no PCIDs version */
>  IDTVEC(ipi_invlpg)
> -	pushq	%rax
> +	ioapic_asm_ack_fast_ipi()
>
> -	ioapic_asm_ack()
> +	pushq	%rax
>
>  	movq	tlb_shoot_addr1, %rax
>  	invlpg	(%rax)
> @@ -698,11 +731,11 @@ END(Xipi_invlpg)
>
>  /* invalidate a range of pages, no PCIDs version */
>  IDTVEC(ipi_invlrange)
> +	ioapic_asm_ack_fast_ipi()
> +
>  	pushq	%rax
>  	pushq	%rdx
>
> -	ioapic_asm_ack()
> -
>  	movq	tlb_shoot_addr1, %rax
>  	movq	tlb_shoot_addr2, %rdx
>  1:	invlpg	(%rax)
> @@ -727,9 +760,9 @@ END(Xipi_invlrange)
>   * Invalidate the userspace PCIDs.
>   */
>  IDTVEC(ipi_invltlb_pcid)
> -	pushq	%rax
> +	ioapic_asm_ack_fast_ipi()
>
> -	ioapic_asm_ack()
> +	pushq	%rax
>
>  	/* set the type */
>  	movl	$INVPCID_PCID,%eax
> @@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid)
>   * while userspace VAs are present in PCIDs 1 and 2.
>   */
>  IDTVEC(ipi_invlpg_pcid)
> -	pushq	%rax
> +	ioapic_asm_ack_fast_ipi()
>
> -	ioapic_asm_ack()
> +	pushq	%rax
>
>  	/* space for the INVPCID descriptor */
>  	subq	$16,%rsp
> @@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid)
>   * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
>   */
>  IDTVEC(ipi_invlrange_pcid)
> +	ioapic_asm_ack_fast_ipi()
> +
>  	pushq	%rax
>  	pushq	%rdx
>  	pushq	%rcx
>
> -	ioapic_asm_ack()
> -
>  	/* space for the INVPCID descriptor */
>  	subq	$16,%rsp
>
> diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
> index 2ccb638a8e8..6b6bfee62e1 100644
> --- a/sys/arch/amd64/include/codepatch.h
> +++ b/sys/arch/amd64/include/codepatch.h
> @@ -70,6 +70,7 @@ void codepatch_disable(void);
>  #define CPTAG_RETPOLINE_R11		15
>  #define CPTAG_RETPOLINE_R13		16
>  #define CPTAG_IBPB_NOP			17
> +#define CPTAG_EOI_FAST_IPI		18
>
>  /*
>   * stac/clac SMAP instructions have lfence like semantics.  Let's
> diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
> index 99b22923499..2761d4f045f 100644
> --- a/sys/arch/amd64/include/i82093reg.h
> +++ b/sys/arch/amd64/include/i82093reg.h
> @@ -114,7 +114,22 @@
>
>  #include <machine/codepatch.h>
>
> -#define ioapic_asm_ack(num) 					 \
> +/*
> + * This macro must also work if swapgs has not been called on entry
> + * from userland. To make this work, the macro must be called before %rsp
> + * is modified, see x2apic_eoi_swapgs.
> + */
> +#define ioapic_asm_ack_fast_ipi(num)				\
> +	CODEPATCH_START						;\
> +	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
> +	CODEPATCH_END(CPTAG_EOI_FAST_IPI)
> +
> +
> +/*
> + * This macro assumes that swapgs has already been called (e.g. by
> + * INTRENTRY).
> + */
> +#define ioapic_asm_ack(num)					 \
>  	CODEPATCH_START						;\
>  	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
>  	CODEPATCH_END(CPTAG_EOI)
>