Download raw body.
SEV: Modifiy IPI handling to work with the #VC trap handler
On Fri, Jan 16, 2026 at 10:56:48AM +0100, Stefan Fritsch wrote:
> Hi,
>
> this is the next commit on the way to make SEV-ES work with MP.
>
> With SEV-ES the wrmsr instruction used to access x2apic registers traps
> into the #VC trap handler. The trap handler will need the kernel GS_BASE
> once we move the ghcb address into a per-cpu var for MP. So, if we come
> from the userland, we need to do swapgs. The fast IPI handlers do not
> perform swapgs, so we add a special variant of the x2apic_eoi function
> that also does swapgs.
>
> Parts from Sebastian Sturm
>
> ok?
>
> Cheers,
> Stefan
>
I think we should wait and make sure the recent changes are solid. There
has been a fair amount of churn in this area (amd64 IPIs) recently and I don't
want to be piling on a bunch of other stuff too quickly.
Other comments see below.
-ml
> diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
> index b19dcb349e3..b99e4ab80d7 100644
> --- a/sys/arch/amd64/amd64/lapic.c
> +++ b/sys/arch/amd64/amd64/lapic.c
> @@ -99,6 +99,7 @@ struct pic local_pic = {
> };
>
> extern int x2apic_eoi;
> +extern int x2apic_eoi_swapgs;
> int x2apic_enabled = 0;
>
> u_int32_t x2apic_readreg(int reg);
> @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
> #endif
> x2apic_enabled = 1;
> codepatch_call(CPTAG_EOI, &x2apic_eoi);
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
x2apic_eoi_swapgs should be named something "for SEV" since just looking at that
name doesn't convey that this is "for SEV only".
> + else
> + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
>
> va = (vaddr_t)&local_apic;
> } else {
> @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
> pte = kvtopte(va);
> *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
> invlpg(va);
> +
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> + panic("xAPIC mode not implemented for SEV-ES");
does this affect openbsd SEV-ES guests?
> }
>
> /*
> diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
> index 9de0ee97f08..f78d65f66ae 100644
> --- a/sys/arch/amd64/amd64/vector.S
> +++ b/sys/arch/amd64/amd64/vector.S
> @@ -591,6 +591,39 @@ KUENTRY(x2apic_eoi)
> lfence
> END(x2apic_eoi)
>
> +/*
> + * With SEV-ES the wrmsr instruction traps into the #VC handler which
> + * needs the kernel GS_BASE. So if we come from the userland, we need to
> + * do swapgs. The fast IPI handler does not perform swapgs, so we need
> + * to do it here. In order to detect whether we come from user or kernel
> + * land, this function MUST be called before %rsp is modified.
> + */
> +KUENTRY(x2apic_eoi_swapgs)
> + testb $SEL_RPL,16(%rsp)
> + jz 1f
> + swapgs
> + FENCE_SWAPGS_MIS_TAKEN
> +1:
please get guenther@'s ok here. There be dragons in this area (%gs
modifications, interrupt handler concerns, etc). If you end up in places
with wrong %gs you're hooped.
> + pushq %rax
> + pushq %rcx
> + pushq %rdx
> + mov $MSR_X2APIC_EOI,%ecx
> + mov $0,%eax
> + mov $0,%edx
xorl %eax, %eax
xorl %edx, %edx
for consistency with other areas
> + wrmsr
> + popq %rdx
> + popq %rcx
> + popq %rax
> +
> + testb $SEL_RPL,16(%rsp)
> + jz 2f
> + swapgs
> + FENCE_SWAPGS_MIS_TAKEN
> +2:
> + retq
> + lfence
> +END(x2apic_eoi_swapgs)
> +
> #if NLAPIC > 0
> #ifdef MULTIPROCESSOR
> KIDTVEC(recurse_lapic_ipi)
> @@ -630,9 +663,9 @@ END(Xresume_lapic_ipi)
> */
> /* invalidate the entire TLB, no PCIDs version */
> IDTVEC(ipi_invltlb)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> movq %cr3, %rax
> movq %rax, %cr3
> @@ -652,11 +685,11 @@ END(Xipi_invltlb)
> #if NVMM > 0
> /* Invalidate VMX EPT */
> IDTVEC(ipi_invept)
> + ioapic_asm_ack_fast_ipi()
> +
> pushq %rax
> pushq %rdx
>
> - ioapic_asm_ack()
> -
> movq $ept_shoot_vid, %rax
> movq ept_shoot_mode, %rdx
> invept (%rax), %rdx
> @@ -677,9 +710,9 @@ END(Xipi_invept)
>
> /* invalidate a single page, no PCIDs version */
> IDTVEC(ipi_invlpg)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> movq tlb_shoot_addr1, %rax
> invlpg (%rax)
> @@ -698,11 +731,11 @@ END(Xipi_invlpg)
>
> /* invalidate a range of pages, no PCIDs version */
> IDTVEC(ipi_invlrange)
> + ioapic_asm_ack_fast_ipi()
> +
> pushq %rax
> pushq %rdx
>
> - ioapic_asm_ack()
> -
> movq tlb_shoot_addr1, %rax
> movq tlb_shoot_addr2, %rdx
> 1: invlpg (%rax)
> @@ -727,9 +760,9 @@ END(Xipi_invlrange)
> * Invalidate the userspace PCIDs.
> */
> IDTVEC(ipi_invltlb_pcid)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> /* set the type */
> movl $INVPCID_PCID,%eax
> @@ -766,9 +799,9 @@ END(Xipi_invltlb_pcid)
> * while userspace VAs are present in PCIDs 1 and 2.
> */
> IDTVEC(ipi_invlpg_pcid)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> /* space for the INVPCID descriptor */
> subq $16,%rsp
> @@ -815,12 +848,12 @@ END(Xipi_invlpg_pcid)
> * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
> */
> IDTVEC(ipi_invlrange_pcid)
> + ioapic_asm_ack_fast_ipi()
> +
> pushq %rax
> pushq %rdx
> pushq %rcx
>
> - ioapic_asm_ack()
> -
> /* space for the INVPCID descriptor */
> subq $16,%rsp
>
> diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
> index 2ccb638a8e8..6b6bfee62e1 100644
> --- a/sys/arch/amd64/include/codepatch.h
> +++ b/sys/arch/amd64/include/codepatch.h
> @@ -70,6 +70,7 @@ void codepatch_disable(void);
> #define CPTAG_RETPOLINE_R11 15
> #define CPTAG_RETPOLINE_R13 16
> #define CPTAG_IBPB_NOP 17
> +#define CPTAG_EOI_FAST_IPI 18
>
> /*
> * stac/clac SMAP instructions have lfence like semantics. Let's
> diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
> index 99b22923499..2761d4f045f 100644
> --- a/sys/arch/amd64/include/i82093reg.h
> +++ b/sys/arch/amd64/include/i82093reg.h
> @@ -114,7 +114,22 @@
>
> #include <machine/codepatch.h>
>
> -#define ioapic_asm_ack(num) \
> +/*
> + * This macro must also work if swapgs has not been called on entry
> + * from userland. To make this work, the macro must be called before %rsp
> + * is modified, see x2apic_eoi_swapgs.
> + */
> +#define ioapic_asm_ack_fast_ipi(num) \
> + CODEPATCH_START ;\
> + movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
> + CODEPATCH_END(CPTAG_EOI_FAST_IPI)
> +
> +
> +/*
> + * This macro assumes that swapgs has already been called (e.g. by
> + * INTRENTRY).
> + */
> +#define ioapic_asm_ack(num) \
> CODEPATCH_START ;\
> movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
> CODEPATCH_END(CPTAG_EOI)
>
SEV: Modifiy IPI handling to work with the #VC trap handler