Download raw body.
SEV: Modifiy IPI handling to work with the #VC trap handler
added guenther@ to cc because of the swapgs part in vector.S
On Fri, 16 Jan 2026, Mike Larkin wrote:
> On Fri, Jan 16, 2026 at 10:56:48AM +0100, Stefan Fritsch wrote:
> > Hi,
> >
> > this is the next commit on the way to make SEV-ES work with MP.
> >
> > With SEV-ES the wrmsr instruction used to access x2apic registers traps
> > into the #VC trap handler. The trap handler will need the kernel GS_BASE
> > once we move the ghcb address into a per-cpu var for MP. So, if we come
> > from the userland, we need to do swapgs. The fast IPI handlers do not
> > perform swapgs, so we add a special variant of the x2apic_eoi function
> > that also does swapgs.
> >
> > Parts from Sebastian Sturm
> >
> > ok?
>
> I think we should wait and make sure the recent changes are solid. There
> has been a fair amount of churn in this area (amd64 IPIs) recently and I don't
> want to be piling on a bunch of other stuff too quickly.
Sure, no problem. Maybe things will have settled down enough by the time
of the hackathon. New diff with your comments fixed at the bottom of the
mail.
>
> Other comments see below.
>
> -ml
>
> > diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
> > index b19dcb349e3..b99e4ab80d7 100644
> > --- a/sys/arch/amd64/amd64/lapic.c
> > +++ b/sys/arch/amd64/amd64/lapic.c
> > @@ -99,6 +99,7 @@ struct pic local_pic = {
> > };
> >
> > extern int x2apic_eoi;
> > +extern int x2apic_eoi_swapgs;
> > int x2apic_enabled = 0;
> >
> > u_int32_t x2apic_readreg(int reg);
> > @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
> > #endif
> > x2apic_enabled = 1;
> > codepatch_call(CPTAG_EOI, &x2apic_eoi);
> > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> > + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
>
> x2apic_eoi_swapgs should be named something "for SEV" since just looking at that
> name doesn't convey that this is "for SEV only".
I have renamed it to x2apic_eoi_sev_es
>
> > + else
> > + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
> >
> > va = (vaddr_t)&local_apic;
> > } else {
> > @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
> > pte = kvtopte(va);
> > *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
> > invlpg(va);
> > +
> > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> > + panic("xAPIC mode not implemented for SEV-ES");
>
> does this affect openbsd SEV-ES guests?
This already does not work, as we don't support MMIO in the #VC trap
handler and xapic mode uses direct memory access without busdma API. This
just adds a better error message. To make xapic work, we would need to
change lapic.c to use the busdma API, like kettenis did for the ioapic.
But at the moment, I don't think this is worth the effort. All hypervisors
we currently support for SEV-ES do support x2apic.
>
> > }
> >
> > /*
> > diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
> > index 9de0ee97f08..f78d65f66ae 100644
> > --- a/sys/arch/amd64/amd64/vector.S
> > +++ b/sys/arch/amd64/amd64/vector.S
> > @@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
> > lfence
> > END(x2apic_eoi)
> >
> > +/*
> > + * With SEV-ES the wrmsr instruction traps into the #VC handler which
> > + * needs the kernel GS_BASE. So if we come from the userland, we need to
> > + * do swapgs. The fast IPI handler does not perform swapgs, so we need
> > + * to do it here. In order to detect whether we come from user or kernel
> > + * land, this function MUST be called before %rsp is modified.
> > + */
> > +KUENTRY(x2apic_eoi_swapgs)
> > + testb $SEL_RPL,16(%rsp)
> > + jz 1f
> > + swapgs
> > + FENCE_SWAPGS_MIS_TAKEN
> > +1:
>
> please get guenther@'s ok here. There be dragons in this area (%gs
> modifications, interrupt handler concerns, etc). If you end up in places
> with wrong %gs you're hooped.
>
> > + pushq %rax
> > + pushq %rcx
> > + pushq %rdx
> > + mov $MSR_X2APIC_EOI,%ecx
> > + mov $0,%eax
> > + mov $0,%edx
>
> xorl %eax, %eax
> xorl %edx, %edx
>
> for consistency with other areas
fixed
>
> > + wrmsr
> > + popq %rdx
> > + popq %rcx
> > + popq %rax
> > +
> > + testb $SEL_RPL,16(%rsp)
> > + jz 2f
> > + swapgs
> > + FENCE_SWAPGS_MIS_TAKEN
> > +2:
> > + retq
> > + lfence
> > +END(x2apic_eoi_swapgs)
> > +
...
Cheers,
Stefan
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index b19dcb349e3..00b4a1ed288 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -99,6 +99,7 @@ struct pic local_pic = {
};
extern int x2apic_eoi;
+extern int x2apic_eoi_sev_es;
int x2apic_enabled = 0;
u_int32_t x2apic_readreg(int reg);
@@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
#endif
x2apic_enabled = 1;
codepatch_call(CPTAG_EOI, &x2apic_eoi);
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_sev_es);
+ else
+ codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
va = (vaddr_t)&local_apic;
} else {
@@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
pte = kvtopte(va);
*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
invlpg(va);
+
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ panic("xAPIC mode not implemented for SEV-ES");
}
/*
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 9de0ee97f08..a81b840b34c 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
lfence
END(x2apic_eoi)
+/*
+ * With SEV-ES the wrmsr instruction traps into the #VC handler which
+ * needs the kernel GS_BASE. So if we come from the userland, we need to
+ * do swapgs. The fast IPI handler does not perform swapgs, so we need
+ * to do it here. In order to detect whether we come from user or kernel
+ * land, this function MUST be called before %rsp is modified.
+ */
+KUENTRY(x2apic_eoi_sev_es)
+ testb $SEL_RPL,16(%rsp)
+ jz 1f
+ swapgs
+ FENCE_SWAPGS_MIS_TAKEN
+1:
+ pushq %rax
+ pushq %rcx
+ pushq %rdx
+ mov $MSR_X2APIC_EOI,%ecx
+ xorl %eax,%eax
+ xorl %edx,%edx
+ wrmsr
+ popq %rdx
+ popq %rcx
+ popq %rax
+
+ testb $SEL_RPL,16(%rsp)
+ jz 2f
+ swapgs
+ FENCE_SWAPGS_MIS_TAKEN
+2:
+ retq
+ lfence
+END(x2apic_eoi_sev_es)
+
#if NLAPIC > 0
#ifdef MULTIPROCESSOR
KIDTVEC(recurse_lapic_ipi)
@@ -630,9 +663,9 @@ END(Xresume_lapic_ipi)
*/
/* invalidate the entire TLB, no PCIDs version */
IDTVEC(ipi_invltlb)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
movq %cr3, %rax
movq %rax, %cr3
@@ -652,11 +685,11 @@ END(Xipi_invltlb)
#if NVMM > 0
/* Invalidate VMX EPT */
IDTVEC(ipi_invept)
+ ioapic_asm_ack_fast_ipi()
+
pushq %rax
pushq %rdx
- ioapic_asm_ack()
-
movq $ept_shoot_vid, %rax
movq ept_shoot_mode, %rdx
invept (%rax), %rdx
@@ -677,9 +710,9 @@ END(Xipi_invept)
/* invalidate a single page, no PCIDs version */
IDTVEC(ipi_invlpg)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
movq tlb_shoot_addr1, %rax
invlpg (%rax)
@@ -698,11 +731,11 @@ END(Xipi_invlpg)
/* invalidate a range of pages, no PCIDs version */
IDTVEC(ipi_invlrange)
+ ioapic_asm_ack_fast_ipi()
+
pushq %rax
pushq %rdx
- ioapic_asm_ack()
-
movq tlb_shoot_addr1, %rax
movq tlb_shoot_addr2, %rdx
1: invlpg (%rax)
@@ -727,9 +760,9 @@ END(Xipi_invlrange)
* Invalidate the userspace PCIDs.
*/
IDTVEC(ipi_invltlb_pcid)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
/* set the type */
movl $INVPCID_PCID,%eax
@@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid)
* while userspace VAs are present in PCIDs 1 and 2.
*/
IDTVEC(ipi_invlpg_pcid)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
/* space for the INVPCID descriptor */
subq $16,%rsp
@@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid)
* PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
*/
IDTVEC(ipi_invlrange_pcid)
+ ioapic_asm_ack_fast_ipi()
+
pushq %rax
pushq %rdx
pushq %rcx
- ioapic_asm_ack()
-
/* space for the INVPCID descriptor */
subq $16,%rsp
diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
index 2ccb638a8e8..6b6bfee62e1 100644
--- a/sys/arch/amd64/include/codepatch.h
+++ b/sys/arch/amd64/include/codepatch.h
@@ -70,6 +70,7 @@ void codepatch_disable(void);
#define CPTAG_RETPOLINE_R11 15
#define CPTAG_RETPOLINE_R13 16
#define CPTAG_IBPB_NOP 17
+#define CPTAG_EOI_FAST_IPI 18
/*
* stac/clac SMAP instructions have lfence like semantics. Let's
diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
index 99b22923499..3288176fb22 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -114,7 +114,21 @@
#include <machine/codepatch.h>
-#define ioapic_asm_ack(num) \
+/*
+ * This macro must also work if swapgs has not been called on entry
+ * from user land.
+ */
+#define ioapic_asm_ack_fast_ipi(num) \
+ CODEPATCH_START ;\
+ movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
+ CODEPATCH_END(CPTAG_EOI_FAST_IPI)
+
+
+/*
+ * This macro assumes that swapgs has already been called (e.g. by
+ * INTRENTRY).
+ */
+#define ioapic_asm_ack(num) \
CODEPATCH_START ;\
movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
CODEPATCH_END(CPTAG_EOI)
SEV: Modifiy IPI handling to work with the #VC trap handler