From: Mike Larkin Subject: Re: SEV: Modifiy IPI handling to work with the #VC trap handler To: Stefan Fritsch Cc: tech@openbsd.org Date: Fri, 16 Jan 2026 19:21:02 -0800 On Fri, Jan 16, 2026 at 10:56:48AM +0100, Stefan Fritsch wrote: > Hi, > > this is the next commit on the way to make SEV-ES work with MP. > > With SEV-ES the wrmsr instruction used to access x2apic registers traps > into the #VC trap handler. The trap handler will need the kernel GS_BASE > once we move the ghcb address into a per-cpu var for MP. So, if we come > from the userland, we need to do swapgs. The fast IPI handlers do not > perform swapgs, so we add a special variant of the x2apic_eoi function > that also does swapgs. > > Parts from Sebastian Sturm > > ok? > > Cheers, > Stefan > I think we should wait and make sure the recent changes are solid. There has been a fair amount of churn in this area (amd64 IPIs) recently and I don't want to be piling on a bunch of other stuff too quickly. Other comments see below. -ml > diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c > index b19dcb349e3..b99e4ab80d7 100644 > --- a/sys/arch/amd64/amd64/lapic.c > +++ b/sys/arch/amd64/amd64/lapic.c > @@ -99,6 +99,7 @@ struct pic local_pic = { > }; > > extern int x2apic_eoi; > +extern int x2apic_eoi_swapgs; > int x2apic_enabled = 0; > > u_int32_t x2apic_readreg(int reg); > @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base) > #endif > x2apic_enabled = 1; > codepatch_call(CPTAG_EOI, &x2apic_eoi); > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) > + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs); x2apic_eoi_swapgs should be named something "for SEV" since just looking at that name doesn't convey that this is "for SEV only". > + else > + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi); > > va = (vaddr_t)&local_apic; > } else { > @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base) > pte = kvtopte(va); > *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx; > invlpg(va); > + > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) > + panic("xAPIC mode not implemented for SEV-ES"); does this affect openbsd SEV-ES guests? > } > > /* > diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S > index 9de0ee97f08..f78d65f66ae 100644 > --- a/sys/arch/amd64/amd64/vector.S > +++ b/sys/arch/amd64/amd64/vector.S > @@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi) > lfence > END(x2apic_eoi) > > +/* > + * With SEV-ES the wrmsr instruction traps into the #VC handler which > + * needs the kernel GS_BASE. So if we come from the userland, we need to > + * do swapgs. The fast IPI handler does not perform swapgs, so we need > + * to do it here. In order to detect whether we come from user or kernel > + * land, this function MUST be called before %rsp is modified. > + */ > +KUENTRY(x2apic_eoi_swapgs) > + testb $SEL_RPL,16(%rsp) > + jz 1f > + swapgs > + FENCE_SWAPGS_MIS_TAKEN > +1: please get guenther@'s ok here. There be dragons in this area (%gs modifications, interrupt handler concerns, etc). If you end up in places with wrong %gs you're hooped. > + pushq %rax > + pushq %rcx > + pushq %rdx > + mov $MSR_X2APIC_EOI,%ecx > + mov $0,%eax > + mov $0,%edx xorl %eax, %eax xorl %edx, %edx for consistency with other areas > + wrmsr > + popq %rdx > + popq %rcx > + popq %rax > + > + testb $SEL_RPL,16(%rsp) > + jz 2f > + swapgs > + FENCE_SWAPGS_MIS_TAKEN > +2: > + retq > + lfence > +END(x2apic_eoi_swapgs) > + > #if NLAPIC > 0 > #ifdef MULTIPROCESSOR > KIDTVEC(recurse_lapic_ipi) > @@ -630,9 +663,9 @@ END(Xresume_lapic_ipi) > */ > /* invalidate the entire TLB, no PCIDs version */ > IDTVEC(ipi_invltlb) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > movq %cr3, %rax > movq %rax, %cr3 > @@ -652,11 +685,11 @@ END(Xipi_invltlb) > #if NVMM > 0 > /* Invalidate VMX EPT */ > IDTVEC(ipi_invept) > + ioapic_asm_ack_fast_ipi() > + > pushq %rax > pushq %rdx > > - ioapic_asm_ack() > - > movq $ept_shoot_vid, %rax > movq ept_shoot_mode, %rdx > invept (%rax), %rdx > @@ -677,9 +710,9 @@ END(Xipi_invept) > > /* invalidate a single page, no PCIDs version */ > IDTVEC(ipi_invlpg) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > movq tlb_shoot_addr1, %rax > invlpg (%rax) > @@ -698,11 +731,11 @@ END(Xipi_invlpg) > > /* invalidate a range of pages, no PCIDs version */ > IDTVEC(ipi_invlrange) > + ioapic_asm_ack_fast_ipi() > + > pushq %rax > pushq %rdx > > - ioapic_asm_ack() > - > movq tlb_shoot_addr1, %rax > movq tlb_shoot_addr2, %rdx > 1: invlpg (%rax) > @@ -727,9 +760,9 @@ END(Xipi_invlrange) > * Invalidate the userspace PCIDs. > */ > IDTVEC(ipi_invltlb_pcid) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > /* set the type */ > movl $INVPCID_PCID,%eax > @@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid) > * while userspace VAs are present in PCIDs 1 and 2. > */ > IDTVEC(ipi_invlpg_pcid) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > /* space for the INVPCID descriptor */ > subq $16,%rsp > @@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid) > * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2. > */ > IDTVEC(ipi_invlrange_pcid) > + ioapic_asm_ack_fast_ipi() > + > pushq %rax > pushq %rdx > pushq %rcx > > - ioapic_asm_ack() > - > /* space for the INVPCID descriptor */ > subq $16,%rsp > > diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h > index 2ccb638a8e8..6b6bfee62e1 100644 > --- a/sys/arch/amd64/include/codepatch.h > +++ b/sys/arch/amd64/include/codepatch.h > @@ -70,6 +70,7 @@ void codepatch_disable(void); > #define CPTAG_RETPOLINE_R11 15 > #define CPTAG_RETPOLINE_R13 16 > #define CPTAG_IBPB_NOP 17 > +#define CPTAG_EOI_FAST_IPI 18 > > /* > * stac/clac SMAP instructions have lfence like semantics. Let's > diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h > index 99b22923499..2761d4f045f 100644 > --- a/sys/arch/amd64/include/i82093reg.h > +++ b/sys/arch/amd64/include/i82093reg.h > @@ -114,7 +114,22 @@ > > #include > > -#define ioapic_asm_ack(num) \ > +/* > + * This macro must also work if swapgs has not been called on entry > + * from userland. To make this work, the macro must be called before %rsp > + * is modified, see x2apic_eoi_swapgs. > + */ > +#define ioapic_asm_ack_fast_ipi(num) \ > + CODEPATCH_START ;\ > + movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ > + CODEPATCH_END(CPTAG_EOI_FAST_IPI) > + > + > +/* > + * This macro assumes that swapgs has already been called (e.g. by > + * INTRENTRY). > + */ > +#define ioapic_asm_ack(num) \ > CODEPATCH_START ;\ > movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ > CODEPATCH_END(CPTAG_EOI) >