From: Stefan Fritsch Subject: SEV: Modifiy IPI handling to work with the #VC trap handler To: tech@openbsd.org Cc: Mike Larkin Date: Fri, 16 Jan 2026 10:56:48 +0100 Hi, this is the next commit on the way to make SEV-ES work with MP. With SEV-ES the wrmsr instruction used to access x2apic registers traps into the #VC trap handler. The trap handler will need the kernel GS_BASE once we move the ghcb address into a per-cpu var for MP. So, if we come from the userland, we need to do swapgs. The fast IPI handlers do not perform swapgs, so we add a special variant of the x2apic_eoi function that also does swapgs. Parts from Sebastian Sturm ok? Cheers, Stefan diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index b19dcb349e3..b99e4ab80d7 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -99,6 +99,7 @@ struct pic local_pic = { }; extern int x2apic_eoi; +extern int x2apic_eoi_swapgs; int x2apic_enabled = 0; u_int32_t x2apic_readreg(int reg); @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base) #endif x2apic_enabled = 1; codepatch_call(CPTAG_EOI, &x2apic_eoi); + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs); + else + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi); va = (vaddr_t)&local_apic; } else { @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base) pte = kvtopte(va); *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx; invlpg(va); + + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + panic("xAPIC mode not implemented for SEV-ES"); } /* diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index 9de0ee97f08..f78d65f66ae 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi) lfence END(x2apic_eoi) +/* + * With SEV-ES the wrmsr instruction traps into the #VC handler which + * needs the kernel GS_BASE. So if we come from the userland, we need to + * do swapgs. The fast IPI handler does not perform swapgs, so we need + * to do it here. In order to detect whether we come from user or kernel + * land, this function MUST be called before %rsp is modified. + */ +KUENTRY(x2apic_eoi_swapgs) + testb $SEL_RPL,16(%rsp) + jz 1f + swapgs + FENCE_SWAPGS_MIS_TAKEN +1: + pushq %rax + pushq %rcx + pushq %rdx + mov $MSR_X2APIC_EOI,%ecx + mov $0,%eax + mov $0,%edx + wrmsr + popq %rdx + popq %rcx + popq %rax + + testb $SEL_RPL,16(%rsp) + jz 2f + swapgs + FENCE_SWAPGS_MIS_TAKEN +2: + retq + lfence +END(x2apic_eoi_swapgs) + #if NLAPIC > 0 #ifdef MULTIPROCESSOR KIDTVEC(recurse_lapic_ipi) @@ -630,9 +663,9 @@ END(Xresume_lapic_ipi) */ /* invalidate the entire TLB, no PCIDs version */ IDTVEC(ipi_invltlb) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax movq %cr3, %rax movq %rax, %cr3 @@ -652,11 +685,11 @@ END(Xipi_invltlb) #if NVMM > 0 /* Invalidate VMX EPT */ IDTVEC(ipi_invept) + ioapic_asm_ack_fast_ipi() + pushq %rax pushq %rdx - ioapic_asm_ack() - movq $ept_shoot_vid, %rax movq ept_shoot_mode, %rdx invept (%rax), %rdx @@ -677,9 +710,9 @@ END(Xipi_invept) /* invalidate a single page, no PCIDs version */ IDTVEC(ipi_invlpg) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax movq tlb_shoot_addr1, %rax invlpg (%rax) @@ -698,11 +731,11 @@ END(Xipi_invlpg) /* invalidate a range of pages, no PCIDs version */ IDTVEC(ipi_invlrange) + ioapic_asm_ack_fast_ipi() + pushq %rax pushq %rdx - ioapic_asm_ack() - movq tlb_shoot_addr1, %rax movq tlb_shoot_addr2, %rdx 1: invlpg (%rax) @@ -727,9 +760,9 @@ END(Xipi_invlrange) * Invalidate the userspace PCIDs. */ IDTVEC(ipi_invltlb_pcid) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax /* set the type */ movl $INVPCID_PCID,%eax @@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid) * while userspace VAs are present in PCIDs 1 and 2. */ IDTVEC(ipi_invlpg_pcid) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax /* space for the INVPCID descriptor */ subq $16,%rsp @@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid) * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2. */ IDTVEC(ipi_invlrange_pcid) + ioapic_asm_ack_fast_ipi() + pushq %rax pushq %rdx pushq %rcx - ioapic_asm_ack() - /* space for the INVPCID descriptor */ subq $16,%rsp diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h index 2ccb638a8e8..6b6bfee62e1 100644 --- a/sys/arch/amd64/include/codepatch.h +++ b/sys/arch/amd64/include/codepatch.h @@ -70,6 +70,7 @@ void codepatch_disable(void); #define CPTAG_RETPOLINE_R11 15 #define CPTAG_RETPOLINE_R13 16 #define CPTAG_IBPB_NOP 17 +#define CPTAG_EOI_FAST_IPI 18 /* * stac/clac SMAP instructions have lfence like semantics. Let's diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h index 99b22923499..2761d4f045f 100644 --- a/sys/arch/amd64/include/i82093reg.h +++ b/sys/arch/amd64/include/i82093reg.h @@ -114,7 +114,22 @@ #include -#define ioapic_asm_ack(num) \ +/* + * This macro must also work if swapgs has not been called on entry + * from userland. To make this work, the macro must be called before %rsp + * is modified, see x2apic_eoi_swapgs. + */ +#define ioapic_asm_ack_fast_ipi(num) \ + CODEPATCH_START ;\ + movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ + CODEPATCH_END(CPTAG_EOI_FAST_IPI) + + +/* + * This macro assumes that swapgs has already been called (e.g. by + * INTRENTRY). + */ +#define ioapic_asm_ack(num) \ CODEPATCH_START ;\ movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ CODEPATCH_END(CPTAG_EOI)