Index | Thread | Search

From:
Stefan Fritsch <sf@sfritsch.de>
Subject:
SEV: Modifiy IPI handling to work with the #VC trap handler
To:
tech@openbsd.org
Cc:
Mike Larkin <mlarkin@nested.page>
Date:
Fri, 16 Jan 2026 10:56:48 +0100

Download raw body.

Thread
Hi,

this is the next commit on the way to make SEV-ES work with MP.

With SEV-ES the wrmsr instruction used to access x2apic registers traps 
into the #VC trap handler. The trap handler will need the kernel GS_BASE 
once we move the ghcb address into a per-cpu var for MP. So, if we come 
from the userland, we need to do swapgs. The fast IPI handlers do not 
perform swapgs, so we add a special variant of the x2apic_eoi function 
that also does swapgs.

Parts from Sebastian Sturm

ok?

Cheers,
Stefan

diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index b19dcb349e3..b99e4ab80d7 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -99,6 +99,7 @@ struct pic local_pic = {
 };
 
 extern int x2apic_eoi;
+extern int x2apic_eoi_swapgs;
 int x2apic_enabled = 0;
 
 u_int32_t x2apic_readreg(int reg);
@@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
 #endif
 		x2apic_enabled = 1;
 		codepatch_call(CPTAG_EOI, &x2apic_eoi);
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
+		else
+			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
 
 		va = (vaddr_t)&local_apic;
 	} else {
@@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
 		pte = kvtopte(va);
 		*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
 		invlpg(va);
+
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+			panic("xAPIC mode not implemented for SEV-ES");
 	}
 
 	/*
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 9de0ee97f08..f78d65f66ae 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
 	lfence
 END(x2apic_eoi)
 
+/*
+ * With SEV-ES the wrmsr instruction traps into the #VC handler which
+ * needs the kernel GS_BASE. So if we come from the userland, we need to
+ * do swapgs. The fast IPI handler does not perform swapgs, so we need
+ * to do it here. In order to detect whether we come from user or kernel
+ * land, this function MUST be called before %rsp is modified.
+ */
+KUENTRY(x2apic_eoi_swapgs)
+	testb	$SEL_RPL,16(%rsp)
+	jz	1f
+	swapgs
+	FENCE_SWAPGS_MIS_TAKEN
+1:
+	pushq	%rax
+	pushq	%rcx
+	pushq	%rdx
+	mov     $MSR_X2APIC_EOI,%ecx
+	mov     $0,%eax
+	mov     $0,%edx
+	wrmsr
+	popq	%rdx
+	popq	%rcx
+	popq	%rax
+
+	testb	$SEL_RPL,16(%rsp)
+	jz	2f
+	swapgs
+	FENCE_SWAPGS_MIS_TAKEN
+2:
+	retq
+	lfence
+END(x2apic_eoi_swapgs)
+
 #if NLAPIC > 0
 #ifdef MULTIPROCESSOR
 KIDTVEC(recurse_lapic_ipi)
@@ -630,9 +663,9 @@ END(Xresume_lapic_ipi)
  */
 /* invalidate the entire TLB, no PCIDs version */
 IDTVEC(ipi_invltlb)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	movq	%cr3, %rax
 	movq	%rax, %cr3
@@ -652,11 +685,11 @@ END(Xipi_invltlb)
 #if NVMM > 0
 /* Invalidate VMX EPT */
 IDTVEC(ipi_invept)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 
-	ioapic_asm_ack()
-
 	movq	$ept_shoot_vid, %rax
 	movq	ept_shoot_mode, %rdx
 	invept	(%rax), %rdx
@@ -677,9 +710,9 @@ END(Xipi_invept)
 
 /* invalidate a single page, no PCIDs version */
 IDTVEC(ipi_invlpg)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	movq	tlb_shoot_addr1, %rax
 	invlpg	(%rax)
@@ -698,11 +731,11 @@ END(Xipi_invlpg)
 
 /* invalidate a range of pages, no PCIDs version */
 IDTVEC(ipi_invlrange)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 
-	ioapic_asm_ack()
-
 	movq	tlb_shoot_addr1, %rax
 	movq	tlb_shoot_addr2, %rdx
 1:	invlpg	(%rax)
@@ -727,9 +760,9 @@ END(Xipi_invlrange)
  * Invalidate the userspace PCIDs.
  */
 IDTVEC(ipi_invltlb_pcid)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	/* set the type */
 	movl	$INVPCID_PCID,%eax
@@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid)
  * while userspace VAs are present in PCIDs 1 and 2.
  */
 IDTVEC(ipi_invlpg_pcid)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	/* space for the INVPCID descriptor */
 	subq	$16,%rsp
@@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid)
  * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
  */
 IDTVEC(ipi_invlrange_pcid)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 	pushq	%rcx
 
-	ioapic_asm_ack()
-
 	/* space for the INVPCID descriptor */
 	subq	$16,%rsp
 
diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
index 2ccb638a8e8..6b6bfee62e1 100644
--- a/sys/arch/amd64/include/codepatch.h
+++ b/sys/arch/amd64/include/codepatch.h
@@ -70,6 +70,7 @@ void codepatch_disable(void);
 #define CPTAG_RETPOLINE_R11		15
 #define CPTAG_RETPOLINE_R13		16
 #define CPTAG_IBPB_NOP			17
+#define CPTAG_EOI_FAST_IPI		18
 
 /*
  * stac/clac SMAP instructions have lfence like semantics.  Let's
diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
index 99b22923499..2761d4f045f 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -114,7 +114,22 @@
 
 #include <machine/codepatch.h>
 
-#define ioapic_asm_ack(num) 					 \
+/*
+ * This macro must also work if swapgs has not been called on entry
+ * from userland. To make this work, the macro must be called before %rsp
+ * is modified, see x2apic_eoi_swapgs.
+ */
+#define ioapic_asm_ack_fast_ipi(num)				\
+	CODEPATCH_START						;\
+	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
+	CODEPATCH_END(CPTAG_EOI_FAST_IPI)
+
+
+/*
+ * This macro assumes that swapgs has already been called (e.g. by
+ * INTRENTRY).
+ */
+#define ioapic_asm_ack(num)					 \
 	CODEPATCH_START						;\
 	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
 	CODEPATCH_END(CPTAG_EOI)