Index | Thread | Search

From:
Stefan Fritsch <sf@sfritsch.de>
Subject:
Re: SEV: Modifiy IPI handling to work with the #VC trap handler
To:
Mike Larkin <mlarkin@nested.page>
Cc:
tech@openbsd.org, guenther@openbsd.org
Date:
Tue, 27 Jan 2026 11:23:55 +0100

Download raw body.

Thread
added guenther@ to cc because of the swapgs part in vector.S

On Fri, 16 Jan 2026, Mike Larkin wrote:

> On Fri, Jan 16, 2026 at 10:56:48AM +0100, Stefan Fritsch wrote:
> > Hi,
> >
> > this is the next commit on the way to make SEV-ES work with MP.
> >
> > With SEV-ES the wrmsr instruction used to access x2apic registers traps
> > into the #VC trap handler. The trap handler will need the kernel GS_BASE
> > once we move the ghcb address into a per-cpu var for MP. So, if we come
> > from the userland, we need to do swapgs. The fast IPI handlers do not
> > perform swapgs, so we add a special variant of the x2apic_eoi function
> > that also does swapgs.
> >
> > Parts from Sebastian Sturm
> >
> > ok?
> 
> I think we should wait and make sure the recent changes are solid. There
> has been a fair amount of churn in this area (amd64 IPIs) recently and I don't
> want to be piling on a bunch of other stuff too quickly.

Sure, no problem. Maybe things will have settled down enough by the time 
of the hackathon. New diff with your comments fixed at the bottom of the 
mail.

> 
> Other comments see below.
> 
> -ml
> 
> > diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
> > index b19dcb349e3..b99e4ab80d7 100644
> > --- a/sys/arch/amd64/amd64/lapic.c
> > +++ b/sys/arch/amd64/amd64/lapic.c
> > @@ -99,6 +99,7 @@ struct pic local_pic = {
> >  };
> >
> >  extern int x2apic_eoi;
> > +extern int x2apic_eoi_swapgs;
> >  int x2apic_enabled = 0;
> >
> >  u_int32_t x2apic_readreg(int reg);
> > @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
> >  #endif
> >  		x2apic_enabled = 1;
> >  		codepatch_call(CPTAG_EOI, &x2apic_eoi);
> > +		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> > +			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
> 
> x2apic_eoi_swapgs should be named something "for SEV" since just looking at that
> name doesn't convey that this is "for SEV only".

I have renamed it to x2apic_eoi_sev_es

> 
> > +		else
> > +			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
> >
> >  		va = (vaddr_t)&local_apic;
> >  	} else {
> > @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
> >  		pte = kvtopte(va);
> >  		*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
> >  		invlpg(va);
> > +
> > +		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> > +			panic("xAPIC mode not implemented for SEV-ES");
> 
> does this affect openbsd SEV-ES guests?

This already does not work, as we don't support MMIO in the #VC trap 
handler and xapic mode uses direct memory access without busdma API. This 
just adds a better error message. To make xapic work, we would need to 
change lapic.c to use the busdma API, like kettenis did for the ioapic. 
But at the moment, I don't think this is worth the effort. All hypervisors 
we currently support for SEV-ES do support x2apic.

> 
> >  	}
> >
> >  	/*
> > diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
> > index 9de0ee97f08..f78d65f66ae 100644
> > --- a/sys/arch/amd64/amd64/vector.S
> > +++ b/sys/arch/amd64/amd64/vector.S
> > @@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
> >  	lfence
> >  END(x2apic_eoi)
> >
> > +/*
> > + * With SEV-ES the wrmsr instruction traps into the #VC handler which
> > + * needs the kernel GS_BASE. So if we come from the userland, we need to
> > + * do swapgs. The fast IPI handler does not perform swapgs, so we need
> > + * to do it here. In order to detect whether we come from user or kernel
> > + * land, this function MUST be called before %rsp is modified.
> > + */
> > +KUENTRY(x2apic_eoi_swapgs)
> > +	testb	$SEL_RPL,16(%rsp)
> > +	jz	1f
> > +	swapgs
> > +	FENCE_SWAPGS_MIS_TAKEN
> > +1:
> 
> please get guenther@'s ok here. There be dragons in this area (%gs
> modifications, interrupt handler concerns, etc). If you end up in places
> with wrong %gs you're hooped.
> 
> > +	pushq	%rax
> > +	pushq	%rcx
> > +	pushq	%rdx
> > +	mov     $MSR_X2APIC_EOI,%ecx
> > +	mov     $0,%eax
> > +	mov     $0,%edx
> 
> xorl %eax, %eax
> xorl %edx, %edx
> 
> for consistency with other areas

fixed

> 
> > +	wrmsr
> > +	popq	%rdx
> > +	popq	%rcx
> > +	popq	%rax
> > +
> > +	testb	$SEL_RPL,16(%rsp)
> > +	jz	2f
> > +	swapgs
> > +	FENCE_SWAPGS_MIS_TAKEN
> > +2:
> > +	retq
> > +	lfence
> > +END(x2apic_eoi_swapgs)
> > +
...

Cheers,
Stefan



diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index b19dcb349e3..00b4a1ed288 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -99,6 +99,7 @@ struct pic local_pic = {
 };
 
 extern int x2apic_eoi;
+extern int x2apic_eoi_sev_es;
 int x2apic_enabled = 0;
 
 u_int32_t x2apic_readreg(int reg);
@@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
 #endif
 		x2apic_enabled = 1;
 		codepatch_call(CPTAG_EOI, &x2apic_eoi);
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_sev_es);
+		else
+			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
 
 		va = (vaddr_t)&local_apic;
 	} else {
@@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
 		pte = kvtopte(va);
 		*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
 		invlpg(va);
+
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+			panic("xAPIC mode not implemented for SEV-ES");
 	}
 
 	/*
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 9de0ee97f08..a81b840b34c 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
 	lfence
 END(x2apic_eoi)
 
+/*
+ * With SEV-ES the wrmsr instruction traps into the #VC handler which
+ * needs the kernel GS_BASE. So if we come from the userland, we need to
+ * do swapgs. The fast IPI handler does not perform swapgs, so we need
+ * to do it here. In order to detect whether we come from user or kernel
+ * land, this function MUST be called before %rsp is modified.
+ */
+KUENTRY(x2apic_eoi_sev_es)
+	testb	$SEL_RPL,16(%rsp)
+	jz	1f
+	swapgs
+	FENCE_SWAPGS_MIS_TAKEN
+1:
+	pushq	%rax
+	pushq	%rcx
+	pushq	%rdx
+	mov     $MSR_X2APIC_EOI,%ecx
+	xorl    %eax,%eax
+	xorl    %edx,%edx
+	wrmsr
+	popq	%rdx
+	popq	%rcx
+	popq	%rax
+
+	testb	$SEL_RPL,16(%rsp)
+	jz	2f
+	swapgs
+	FENCE_SWAPGS_MIS_TAKEN
+2:
+	retq
+	lfence
+END(x2apic_eoi_sev_es)
+
 #if NLAPIC > 0
 #ifdef MULTIPROCESSOR
 KIDTVEC(recurse_lapic_ipi)
@@ -630,9 +663,9 @@ END(Xresume_lapic_ipi)
  */
 /* invalidate the entire TLB, no PCIDs version */
 IDTVEC(ipi_invltlb)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	movq	%cr3, %rax
 	movq	%rax, %cr3
@@ -652,11 +685,11 @@ END(Xipi_invltlb)
 #if NVMM > 0
 /* Invalidate VMX EPT */
 IDTVEC(ipi_invept)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 
-	ioapic_asm_ack()
-
 	movq	$ept_shoot_vid, %rax
 	movq	ept_shoot_mode, %rdx
 	invept	(%rax), %rdx
@@ -677,9 +710,9 @@ END(Xipi_invept)
 
 /* invalidate a single page, no PCIDs version */
 IDTVEC(ipi_invlpg)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	movq	tlb_shoot_addr1, %rax
 	invlpg	(%rax)
@@ -698,11 +731,11 @@ END(Xipi_invlpg)
 
 /* invalidate a range of pages, no PCIDs version */
 IDTVEC(ipi_invlrange)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 
-	ioapic_asm_ack()
-
 	movq	tlb_shoot_addr1, %rax
 	movq	tlb_shoot_addr2, %rdx
 1:	invlpg	(%rax)
@@ -727,9 +760,9 @@ END(Xipi_invlrange)
  * Invalidate the userspace PCIDs.
  */
 IDTVEC(ipi_invltlb_pcid)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	/* set the type */
 	movl	$INVPCID_PCID,%eax
@@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid)
  * while userspace VAs are present in PCIDs 1 and 2.
  */
 IDTVEC(ipi_invlpg_pcid)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	/* space for the INVPCID descriptor */
 	subq	$16,%rsp
@@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid)
  * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
  */
 IDTVEC(ipi_invlrange_pcid)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 	pushq	%rcx
 
-	ioapic_asm_ack()
-
 	/* space for the INVPCID descriptor */
 	subq	$16,%rsp
 
diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
index 2ccb638a8e8..6b6bfee62e1 100644
--- a/sys/arch/amd64/include/codepatch.h
+++ b/sys/arch/amd64/include/codepatch.h
@@ -70,6 +70,7 @@ void codepatch_disable(void);
 #define CPTAG_RETPOLINE_R11		15
 #define CPTAG_RETPOLINE_R13		16
 #define CPTAG_IBPB_NOP			17
+#define CPTAG_EOI_FAST_IPI		18
 
 /*
  * stac/clac SMAP instructions have lfence like semantics.  Let's
diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
index 99b22923499..3288176fb22 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -114,7 +114,21 @@
 
 #include <machine/codepatch.h>
 
-#define ioapic_asm_ack(num) 					 \
+/*
+ * This macro must also work if swapgs has not been called on entry
+ * from user land.
+ */
+#define ioapic_asm_ack_fast_ipi(num)				\
+	CODEPATCH_START						;\
+	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
+	CODEPATCH_END(CPTAG_EOI_FAST_IPI)
+
+
+/*
+ * This macro assumes that swapgs has already been called (e.g. by
+ * INTRENTRY).
+ */
+#define ioapic_asm_ack(num)					 \
 	CODEPATCH_START						;\
 	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
 	CODEPATCH_END(CPTAG_EOI)