Download raw body.
SEV-ES multiprocessor support
> Date: Thu, 18 Sep 2025 13:17:32 +0200 (CEST)
> From: Stefan Fritsch <sf@sfritsch.de>
>
> Hi,
>
> one remaining problem with SEV-ES is that we don't support multiprocessor
> yet, and booting openbsd in a SEV-ES VM that has several VCPUs hangs at
>
> cpu3 at mainbus0: apid 3 (application processor)
> cpu3: failed to become ready
> cpu3: failed to identify
>
> Sometimes it continues after some time, but often it does not. I am not
> sure if the problem is on our side or if there is some error handling
> missing in qemu/KVM. Even if it does not hang, some things do not work
> correctly, like sysctl hw.ncpu is wrong, top gives warnings, ...
>
> In any case, I think this should be fixed somehow before the release, in
> order to avoid support requests on the lists. There are two ways forward:
>
> 1) try to get SEV-ES MP support finished before the release.
>
> 2) commit some workaround that prevents openbsd from trying to use the
> application processors if SEV-ES is enabled. Likely in cpu_match().
>
> The diff that implements MP support is attached below. With this diff,
> openbsd works for me in a 4 VCPU VM with SEV-ES enabled.
>
> There is also the question if we actually need MP support for SEV-ES.
> SEV-ES is just a intermediate step and in the end, most people will want
> to use SEV-SNP (supported in Zen 3 Epyc CPUs and later). MP CPU bringup is
> again a bit different with SEV-SNP compared to SEV-ES, though the larger
> part of the diff is needed for both variants. In my opinion, skipping MP
> support for SEV-ES and only implementing it for SEV-SNP later is also an
> option.
>
> I doubt there is enough time for 1). But I could start splitting the diff
> into reviewable parts and we will see how far we get.
>
> What do you think?
I think no. If it isn't finished, it isn't finished. Therwe will be
time to do things properly after the tree unlocks.
> diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
> index 2611859f3f5..247f7b8cff1 100644
> --- a/sys/arch/amd64/amd64/cpu.c
> +++ b/sys/arch/amd64/amd64/cpu.c
> @@ -95,6 +95,7 @@
> #include <machine/gdt.h>
> #include <machine/pio.h>
> #include <machine/vmmvar.h>
> +#include <machine/ghcb.h>
>
> #if NLAPIC > 0
> #include <machine/i82489reg.h>
> @@ -438,6 +439,10 @@ int mp_cpu_start(struct cpu_info *);
> void mp_cpu_start_cleanup(struct cpu_info *);
> struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
> mp_cpu_start_cleanup };
> +int mp_sev_es_cpu_start(struct cpu_info *);
> +void mp_sev_es_cpu_start_cleanup(struct cpu_info *);
> +struct cpu_functions mp_sev_es_cpu_funcs = { mp_sev_es_cpu_start, NULL,
> + mp_sev_es_cpu_start_cleanup };
> #endif /* MULTIPROCESSOR */
>
> const struct cfattach cpu_ca = {
> @@ -606,6 +611,27 @@ cpu_attach(struct device *parent, struct device *self, void *aux)
> ci->ci_tlog_base = malloc(sizeof(struct tlog),
> M_DEVBUF, M_WAITOK);
> #endif
> +
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) {
> + struct ghcb_sa *ghcb_va = NULL;
> + struct vm_page *ghcb_page;
> +
> + ghcb_page = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
> + if (ghcb_page == NULL)
> + panic("failed to allocate GHCB page");
> +
> + ghcb_va = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
> + if (ghcb_va == NULL)
> + panic("failed to allocate virtual GHCB address");
> +
> + pmap_kenter_pa((vaddr_t)ghcb_va, ghcb_page->phys_addr | PMAP_NOCRYPT,
> + PROT_READ | PROT_WRITE);
> +
> + ci->ci_ghcb_paddr = ghcb_page->phys_addr;
> + ci->ci_ghcb = ghcb_va;
> +
> + memset(ghcb_va, 0, PAGE_SIZE);
> + }
> } else {
> ci = &cpu_info_primary;
> #if defined(MULTIPROCESSOR)
> @@ -1031,6 +1057,24 @@ cpu_hatch(void *v)
> struct cpu_info *ci = (struct cpu_info *)v;
> int s;
>
> + /* We need the GSBASE MSR for the vctrap handler to work.
> + * CPUID will trap into the #VC trap handler on AMD SEV-ES.
> + */
> + cpu_init_msrs(ci);
> +
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) {
> + extern int x2apic_enabled;
> +
> + /* Load IDT early for #VC handler */
> + cpu_init_idt();
> + if (x2apic_enabled) {
> + /* Now that we have a #VC handler, we are able
> + * to enable x2APIC.
> + */
> + wrmsr(MSR_APICBASE, rdmsr(MSR_APICBASE) | APICBASE_ENABLE_X2APIC);
> + }
> + }
> +
> {
> uint32_t vendor[4];
> int level;
> @@ -1040,7 +1084,6 @@ cpu_hatch(void *v)
> cpu_set_vendor(ci, level, (const char *)vendor);
> }
>
> - cpu_init_msrs(ci);
>
> #ifdef DEBUG
> if (ci->ci_flags & CPUF_PRESENT)
> @@ -1205,6 +1248,60 @@ mp_cpu_start_cleanup(struct cpu_info *ci)
> outb(IO_RTC, NVRAM_RESET);
> outb(IO_RTC+1, NVRAM_RESET_RST);
> }
> +
> +paddr_t sev_es_jmp_tbl_addr;
> +
> +int mp_sev_es_cpu_start(struct cpu_info *ci)
> +{
> + struct {
> + uint16_t reset_ip;
> + uint16_t reset_cs;
> + } *jmp_tbl;
> +
> + if (sev_es_jmp_tbl_addr == 0) {
> + paddr_t jmp_tbl_paddr;
> +
> + if (!ghcb_get_ap_jump_table(&jmp_tbl_paddr))
> + sev_es_jmp_tbl_addr = jmp_tbl_paddr & ~PAGE_MASK;
> + else
> + panic("failed to get AP jump table address");
> +
> + /* Update the AP jump table only once */
> + jmp_tbl = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
> + if (jmp_tbl == NULL)
> + panic("failed to allocate virtual address");
> +
> + pmap_kenter_pa((vaddr_t)jmp_tbl, sev_es_jmp_tbl_addr,
> + PROT_READ | PROT_WRITE);
> +
> + jmp_tbl->reset_ip = 0;
> + jmp_tbl->reset_cs = MP_TRAMPOLINE >> 4;
> +
> + pmap_kremove((vaddr_t)jmp_tbl, PAGE_SIZE);
> + km_free(jmp_tbl, PAGE_SIZE, &kv_any, &kp_none);
> + }
> +
> + if (ci->ci_flags & CPUF_AP) {
> + x86_ipi_init(ci->ci_apicid);
> +
> + delay(10000);
> +
> + if (cpu_feature & CPUID_APIC) {
> + x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP);
> + delay(200);
> +
> + x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP);
> + delay(200);
> + }
> + }
> +
> + return 0;
> +}
> +
> +void mp_sev_es_cpu_start_cleanup(struct cpu_info *ci)
> +{
> + (void)ci;
> +}
> #endif /* MULTIPROCESSOR */
>
> typedef void (vector)(void);
> diff --git a/sys/arch/amd64/amd64/ghcb.c b/sys/arch/amd64/amd64/ghcb.c
> index 2b0fa809570..aace7f28303 100644
> --- a/sys/arch/amd64/amd64/ghcb.c
> +++ b/sys/arch/amd64/amd64/ghcb.c
> @@ -47,9 +47,6 @@ const uint64_t ghcb_sz_clear_masks[] = {
> 0xffffffffffffffffULL, 0xffffffffffffffffULL
> };
>
> -vaddr_t ghcb_vaddr;
> -paddr_t ghcb_paddr;
> -
> /*
> * ghcb_clear
> *
> @@ -254,6 +251,11 @@ ghcb_sync_in(struct trapframe *frame, struct ghcb_extra_regs *regs,
> frame->tf_rdx |= (ghcb->v_rdx & ghcb_sz_masks[gsin->sz_d]);
> }
>
> + if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO1))
> + regs->exitinfo1 = ghcb->v_sw_exitinfo1;
> + if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO2))
> + regs->exitinfo2 = ghcb->v_sw_exitinfo2;
> +
> if (regs && regs->data) {
> data_sz = regs->data_sz;
> KASSERT(data_sz <= sizeof(ghcb->v_sharedbuf));
> @@ -303,14 +305,14 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read)
> ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_READ;
> ghcb_regs.exitinfo1 = paddr;
> ghcb_regs.exitinfo2 = size;
> - ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa,
> - v_sharedbuf);
> + ghcb_regs.scratch = curcpu()->ci_ghcb_paddr +
> + offsetof(struct ghcb_sa, v_sharedbuf);
> } else {
> ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_WRITE;
> ghcb_regs.exitinfo1 = paddr;
> ghcb_regs.exitinfo2 = size;
> - ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa,
> - v_sharedbuf);
> + ghcb_regs.scratch = curcpu()->ci_ghcb_paddr +
> + offsetof(struct ghcb_sa, v_sharedbuf);
> ghcb_regs.data = val;
> ghcb_regs.data_sz = size;
> }
> @@ -322,10 +324,10 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read)
>
> s = intr_disable();
>
> - ghcb = (struct ghcb_sa *)ghcb_vaddr;
> - ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
> + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
>
> - wrmsr(MSR_SEV_GHCB, ghcb_paddr);
> + ghcb = curcpu()->ci_ghcb;
> + ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
>
> vmgexit();
>
> @@ -399,10 +401,10 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read)
>
> s = intr_disable();
>
> - ghcb = (struct ghcb_sa *)ghcb_vaddr;
> + ghcb = curcpu()->ci_ghcb;
> ghcb_sync_out(&frame, &ghcb_regs, ghcb, &syncout);
>
> - wrmsr(MSR_SEV_GHCB, ghcb_paddr);
> + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
>
> vmgexit();
>
> @@ -418,3 +420,55 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read)
> if (read)
> *val = frame.tf_rax;
> }
> +
> +#ifdef MULTIPROCESSOR
> +int
> +ghcb_get_ap_jump_table(paddr_t *jmp_tbl_addr)
> +{
> + struct ghcb_sa *ghcb;
> + struct ghcb_sync syncout, syncin;
> + struct ghcb_extra_regs ghcb_regs;
> + unsigned long s;
> +
> + memset(&syncout, 0, sizeof(syncout));
> + memset(&syncin, 0, sizeof(syncin));
> + memset(&ghcb_regs, 0, sizeof(ghcb_regs));
> +
> + ghcb_regs.exitcode = SEV_VMGEXIT_AP_JUMP_TABLE;
> + ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout);
> + ghcb_regs.exitinfo1 = 1; // GET
> + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout);
> + ghcb_regs.exitinfo2 = 0;
> + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
> +
> + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncin);
> + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncin);
> +
> + s = intr_disable();
> +
> + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
> +
> + ghcb = curcpu()->ci_ghcb;
> + ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
> +
> + vmgexit();
> +
> + if (ghcb_verify_bm(ghcb->valid_bitmap, syncin.valid_bitmap)) {
> + ghcb_clear(ghcb);
> + panic("invalid hypervisor response");
> + }
> +
> + memset(&ghcb_regs, 0, sizeof(ghcb_regs));
> +
> + ghcb_sync_in(NULL, &ghcb_regs, ghcb, &syncin);
> +
> + intr_restore(s);
> +
> + if (ghcb_regs.exitinfo1 == 0) {
> + *jmp_tbl_addr = ghcb_regs.exitinfo2;
> + return 0;
> + } else {
> + return 1;
> + }
> +}
> +#endif
> diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
> index f7fdb81ccca..80436294e6f 100644
> --- a/sys/arch/amd64/amd64/lapic.c
> +++ b/sys/arch/amd64/amd64/lapic.c
> @@ -99,6 +99,7 @@ struct pic local_pic = {
> };
>
> extern int x2apic_eoi;
> +extern int x2apic_eoi_swapgs;
> int x2apic_enabled = 0;
>
> u_int32_t x2apic_readreg(int reg);
> @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
> #endif
> x2apic_enabled = 1;
> codepatch_call(CPTAG_EOI, &x2apic_eoi);
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
> + else
> + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
>
> va = (vaddr_t)&local_apic;
> } else {
> @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
> pte = kvtopte(va);
> *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
> invlpg(va);
> +
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> + panic("xAPIC mode not implemented for SEV-ES");
> }
>
> /*
> diff --git a/sys/arch/amd64/amd64/locore0.S b/sys/arch/amd64/amd64/locore0.S
> index 4533b19df2f..951da60b1d2 100644
> --- a/sys/arch/amd64/amd64/locore0.S
> +++ b/sys/arch/amd64/amd64/locore0.S
> @@ -804,15 +804,6 @@ longmode_hi:
> addq %rsi,%rdx
> movq %rdx,atdevbase(%rip)
>
> - /* Relocate GHCB. */
> - movq cpu_sev_guestmode(%rip),%rax
> - testq $SEV_STAT_ES_ENABLED,%rax
> - jz .Lnoghcbreloc
> - movq $(PROC0_GHCB_OFF+KERNBASE),%rdx
> - addq %rsi,%rdx
> - movq %rdx,ghcb_vaddr(%rip)
> -
> -.Lnoghcbreloc:
> /* Record start of symbols */
> movq $__kernel_bss_end, ssym(%rip)
>
> diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
> index 3de32b26354..d86ec85db67 100644
> --- a/sys/arch/amd64/amd64/machdep.c
> +++ b/sys/arch/amd64/amd64/machdep.c
> @@ -1342,9 +1342,10 @@ cpu_init_early_vctrap(paddr_t addr)
> cpu_init_idt();
>
> /* Tell vmm(4) about our GHCB. */
> - ghcb_paddr = addr;
> - memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE);
> - wrmsr(MSR_SEV_GHCB, ghcb_paddr);
> + cpu_info_primary.ci_ghcb_paddr = addr;
> + cpu_info_primary.ci_ghcb = (struct ghcb_sa *)(addr + KERNBASE);
> + memset(cpu_info_primary.ci_ghcb, 0, 2 * PAGE_SIZE);
> + wrmsr(MSR_SEV_GHCB, cpu_info_primary.ci_ghcb_paddr);
> }
>
> void
> @@ -1388,6 +1389,7 @@ map_tramps(void)
> extern u_char mp_tramp_data_start[];
> extern u_char mp_tramp_data_end[];
> extern u_int32_t mp_pdirpa;
> + extern u_int32_t mp_sev_guestmode;
> #endif
>
> /*
> @@ -1429,6 +1431,13 @@ map_tramps(void)
> */
> mp_pdirpa = tramp_pdirpa;
>
> + /*
> + * We need to introduce and set mp_sev_guestmode since the
> + * global cpu_sev_guestmode variable may not be accessable in
> + * 16 or 32 bit mode.
> + */
> + mp_sev_guestmode = cpu_sev_guestmode;
> +
> /* Unmap, will be remapped in cpu_start_secondary */
> pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE);
> pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE);
> diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S
> index 96247c8e890..838168843bf 100644
> --- a/sys/arch/amd64/amd64/mptramp.S
> +++ b/sys/arch/amd64/amd64/mptramp.S
> @@ -143,6 +143,14 @@ _TRMP_LABEL(.Lmp_startup)
> rdmsr
> movl %edx, %edi # %edx is needed by wrmsr below
>
> + # If SEV is enabled, we can assume that NXE is supported and we cannot
> + # do cpuid, yet.
> + movl $mp_sev_guestmode, %edx
> + movl (%edx), %edx
> + xorl %eax, %eax
> + testl %edx, %edx
> + jnz 4f
> +
> # Check if we need to enable NXE
> movl $0x80000001, %eax
> cpuid
> @@ -150,6 +158,7 @@ _TRMP_LABEL(.Lmp_startup)
> xorl %eax,%eax
> testl %edx, %edx
> jz 1f
> +4:
> orl $EFER_NXE, %eax
> 1:
> orl $(EFER_LME|EFER_SCE), %eax
> @@ -192,6 +201,31 @@ END(cpu_spinup_trampoline)
>
> .text
> GENTRY(cpu_spinup_finish)
> + movl $mp_sev_guestmode, %eax
> + movl (%eax), %eax
> + testl $SEV_STAT_ES_ENABLED, %eax
> + jz 5f
> +
> + # We are in SEV-ES mode. MSR or MMIO access is only possible
> + # through a GHCB. Query APIC ID via CPUID leaf 1 EBX
> + movl $1, %edx
> + # EBX == 1, function 4 cpuid request
> + movl $(1 << 30 | 4), %eax
> + movl $MSR_SEV_GHCB, %ecx
> + wrmsr
> + rep vmmcall
> + rdmsr
> + # Make sure the query was successful
> + cmpl $(1 << 30 | 5), %eax
> + jne .Lsev_es_terminate
> +
> + movl %edx, %eax
> + shrl $24, %eax
> + # Skip x2apic initialization if running on SEV-ES or higher.
> + # We cannot do rdmsr/wrmsr without a GHCB. Will be done later in cpu_hatch.
> + jmp 2f
> +
> +5:
> movl x2apic_enabled,%eax
> testl %eax,%eax
> jz 1f
> @@ -234,9 +268,18 @@ GENTRY(cpu_spinup_finish)
> movq %rax,%cr0
> call cpu_hatch
> movq $0,-8(%rsp)
> -END(cpu_spinup_finish)
> /* NOTREACHED */
>
> +.Lsev_es_terminate:
> + xorl %edx, %edx
> + movl $0x100, %eax
> + movl $MSR_SEV_GHCB, %ecx
> + wrmsr
> + rep vmmcall
> + hlt
> + jmp .Lsev_es_terminate
> +END(cpu_spinup_finish)
> +
> .section .rodata
> .type mp_tramp_data_start,@object
> mp_tramp_data_start:
> @@ -250,6 +293,12 @@ _TRMP_DATA_LABEL(mp_pdirpa)
> .long 0
> .size mp_pdirpa,4
>
> + .global mp_sev_guestmode
> + .type mp_sev_guestmode,@object
> +_TRMP_DATA_LABEL(mp_sev_guestmode)
> + .long 0
> + .size mp_sev_guestmode,4
> +
>
> _TRMP_DATA_LABEL(.Lmptramp_gdt32)
> .quad 0x0000000000000000
> diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
> index 396366de89b..49c7c0ffb70 100644
> --- a/sys/arch/amd64/amd64/trap.c
> +++ b/sys/arch/amd64/amd64/trap.c
> @@ -426,6 +426,17 @@ vctrap(struct trapframe *frame, int user)
> }
> break;
> }
> + case SVM_VMEXIT_WBINVD:
> + /* There is no special GHCB request for WBNOINVD.
> + * Signal WBINVD to emulate WBNOINVD.
> + */
> + if (*rip == 0xf3)
> + frame->tf_rip += 3;
> + else
> + frame->tf_rip += 2;
> + break;
> + case SVM_VMEXIT_NPF:
> + panic("Unexptected SEV nested page fault");
> default:
> panic("invalid exit code 0x%llx", ghcb_regs.exitcode);
> }
> @@ -436,10 +447,10 @@ vctrap(struct trapframe *frame, int user)
> ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
>
> /* Sync out to GHCB */
> - ghcb = (struct ghcb_sa *)ghcb_vaddr;
> + ghcb = curcpu()->ci_ghcb;
> ghcb_sync_out(frame, &ghcb_regs, ghcb, &syncout);
>
> - wrmsr(MSR_SEV_GHCB, ghcb_paddr);
> + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
>
> /* Call hypervisor. */
> vmgexit();
> diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
> index 8b82db6b4f6..cbfe817ea9c 100644
> --- a/sys/arch/amd64/amd64/vector.S
> +++ b/sys/arch/amd64/amd64/vector.S
> @@ -590,6 +590,40 @@ KUENTRY(x2apic_eoi)
> lfence
> END(x2apic_eoi)
>
> +/*
> + * With SEV-ES the wrmsr instruction traps into the #VC handler which
> + * needs the kernel GS_BASE. So if we come from the userland, we need to
> + * do swapgs. The fast IPI handler does not perform swapgs, so we need
> + * to do it here. In order to detect whether we come from user or kernel
> + * land, this function MUST be called before %rsp is modified.
> + */
> +KUENTRY(x2apic_eoi_swapgs)
> + /* If the come from userland, go swapgs to enabled curcpu() */
> + testb $SEL_RPL,16(%rsp)
> + jz 1f
> + swapgs
> + FENCE_SWAPGS_MIS_TAKEN
> +1:
> + pushq %rax
> + pushq %rcx
> + pushq %rdx
> + mov $MSR_X2APIC_EOI,%ecx
> + mov $0,%eax
> + mov $0,%edx
> + wrmsr
> + popq %rdx
> + popq %rcx
> + popq %rax
> +
> + testb $SEL_RPL,16(%rsp)
> + jz 2f
> + swapgs
> + FENCE_SWAPGS_MIS_TAKEN
> +2:
> + retq
> + lfence
> +END(x2apic_eoi_swapgs)
> +
> #if NLAPIC > 0
> #ifdef MULTIPROCESSOR
> KIDTVEC(recurse_lapic_ipi)
> @@ -629,9 +663,9 @@ END(Xresume_lapic_ipi)
> */
> /* invalidate the entire TLB, no PCIDs version */
> IDTVEC(ipi_invltlb)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> movq %cr3, %rax
> movq %rax, %cr3
> @@ -646,11 +680,11 @@ END(Xipi_invltlb)
> #if NVMM > 0
> /* Invalidate VMX EPT */
> IDTVEC(ipi_invept)
> + ioapic_asm_ack_fast_ipi()
> +
> pushq %rax
> pushq %rdx
>
> - ioapic_asm_ack()
> -
> movq $ept_shoot_vid, %rax
> movq ept_shoot_mode, %rdx
> invept (%rax), %rdx
> @@ -666,9 +700,9 @@ END(Xipi_invept)
>
> /* invalidate a single page, no PCIDs version */
> IDTVEC(ipi_invlpg)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> movq tlb_shoot_addr1, %rax
> invlpg (%rax)
> @@ -682,11 +716,11 @@ END(Xipi_invlpg)
>
> /* invalidate a range of pages, no PCIDs version */
> IDTVEC(ipi_invlrange)
> + ioapic_asm_ack_fast_ipi()
> +
> pushq %rax
> pushq %rdx
>
> - ioapic_asm_ack()
> -
> movq tlb_shoot_addr1, %rax
> movq tlb_shoot_addr2, %rdx
> 1: invlpg (%rax)
> @@ -706,9 +740,9 @@ END(Xipi_invlrange)
> * Invalidate the userspace PCIDs.
> */
> IDTVEC(ipi_invltlb_pcid)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> /* set the type */
> movl $INVPCID_PCID,%eax
> @@ -740,9 +774,9 @@ END(Xipi_invltlb_pcid)
> * while userspace VAs are present in PCIDs 1 and 2.
> */
> IDTVEC(ipi_invlpg_pcid)
> - pushq %rax
> + ioapic_asm_ack_fast_ipi()
>
> - ioapic_asm_ack()
> + pushq %rax
>
> /* space for the INVPCID descriptor */
> subq $16,%rsp
> @@ -777,12 +811,12 @@ END(Xipi_invlpg_pcid)
> * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
> */
> IDTVEC(ipi_invlrange_pcid)
> + ioapic_asm_ack_fast_ipi()
> +
> pushq %rax
> pushq %rdx
> pushq %rcx
>
> - ioapic_asm_ack()
> -
> /* space for the INVPCID descriptor */
> subq $16,%rsp
>
> @@ -817,7 +851,7 @@ IDTVEC(ipi_invlrange_pcid)
> END(Xipi_invlrange_pcid)
>
> IDTVEC(ipi_wbinvd)
> - ioapic_asm_ack()
> + ioapic_asm_ack_fast_ipi()
>
> wbinvd
>
> diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
> index 2ccb638a8e8..6b6bfee62e1 100644
> --- a/sys/arch/amd64/include/codepatch.h
> +++ b/sys/arch/amd64/include/codepatch.h
> @@ -70,6 +70,7 @@ void codepatch_disable(void);
> #define CPTAG_RETPOLINE_R11 15
> #define CPTAG_RETPOLINE_R13 16
> #define CPTAG_IBPB_NOP 17
> +#define CPTAG_EOI_FAST_IPI 18
>
> /*
> * stac/clac SMAP instructions have lfence like semantics. Let's
> diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
> index 8c71c424a8f..6b725ff796a 100644
> --- a/sys/arch/amd64/include/cpu.h
> +++ b/sys/arch/amd64/include/cpu.h
> @@ -107,6 +107,7 @@ enum cpu_vendor {
> */
> struct x86_64_tss;
> struct vcpu;
> +struct ghcb_sa;
> struct cpu_info {
> /*
> * The beginning of this structure in mapped in the userspace "u-k"
> @@ -219,6 +220,9 @@ struct cpu_info {
> struct uvm_pmr_cache ci_uvm; /* [o] page cache */
> #endif
>
> + struct ghcb_sa *ci_ghcb;
> + paddr_t ci_ghcb_paddr;
> +
> struct ksensordev ci_sensordev;
> struct ksensor ci_sensor;
> struct ksensor ci_hz_sensor;
> diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h
> index fb1de0cb1b1..5b2669a36aa 100644
> --- a/sys/arch/amd64/include/cpuvar.h
> +++ b/sys/arch/amd64/include/cpuvar.h
> @@ -71,6 +71,7 @@ struct cpu_functions {
> };
>
> extern struct cpu_functions mp_cpu_funcs;
> +extern struct cpu_functions mp_sev_es_cpu_funcs;
>
> #define CPU_ROLE_SP 0
> #define CPU_ROLE_BP 1
> diff --git a/sys/arch/amd64/include/ghcb.h b/sys/arch/amd64/include/ghcb.h
> index bac63968d24..a39d5a9401f 100644
> --- a/sys/arch/amd64/include/ghcb.h
> +++ b/sys/arch/amd64/include/ghcb.h
> @@ -111,9 +111,6 @@ struct ghcb_sync {
>
> #ifndef _LOCORE
>
> -extern vaddr_t ghcb_vaddr;
> -extern paddr_t ghcb_paddr;
> -
> struct ghcb_extra_regs {
> uint64_t exitcode;
> uint64_t exitinfo1;
> @@ -136,6 +133,9 @@ void ghcb_sync_in(struct trapframe *, struct ghcb_extra_regs *,
> struct ghcb_sa *, struct ghcb_sync *);
> void _ghcb_mem_rw(vaddr_t, int, void *, bool);
> void _ghcb_io_rw(uint16_t, int, uint32_t *, bool);
> +#ifdef MULTIPROCESSOR
> +int ghcb_get_ap_jump_table(paddr_t *);
> +#endif
>
> static inline uint8_t
> ghcb_mem_read_1(vaddr_t addr)
> diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
> index 99b22923499..3288176fb22 100644
> --- a/sys/arch/amd64/include/i82093reg.h
> +++ b/sys/arch/amd64/include/i82093reg.h
> @@ -114,7 +114,21 @@
>
> #include <machine/codepatch.h>
>
> -#define ioapic_asm_ack(num) \
> +/*
> + * This macro must also work if swapgs has not been called on entry
> + * from user land.
> + */
> +#define ioapic_asm_ack_fast_ipi(num) \
> + CODEPATCH_START ;\
> + movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
> + CODEPATCH_END(CPTAG_EOI_FAST_IPI)
> +
> +
> +/*
> + * This macro assumes that swapgs has already been called (e.g. by
> + * INTRENTRY).
> + */
> +#define ioapic_asm_ack(num) \
> CODEPATCH_START ;\
> movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
> CODEPATCH_END(CPTAG_EOI)
> diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
> index 25b1618ad1f..23ee60eb465 100644
> --- a/sys/arch/amd64/include/vmmvar.h
> +++ b/sys/arch/amd64/include/vmmvar.h
> @@ -271,6 +271,7 @@
> */
> #define SEV_VMGEXIT_MMIO_READ 0x80000001
> #define SEV_VMGEXIT_MMIO_WRITE 0x80000002
> +#define SEV_VMGEXIT_AP_JUMP_TABLE 0x80000005
>
> #ifndef _LOCORE
>
> diff --git a/sys/dev/acpi/acpimadt.c b/sys/dev/acpi/acpimadt.c
> index 275f2b1e6ce..f9f3a0a6538 100644
> --- a/sys/dev/acpi/acpimadt.c
> +++ b/sys/dev/acpi/acpimadt.c
> @@ -263,6 +263,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux)
> caa.cpu_acpi_proc_id = entry->madt_lapic.acpi_proc_id;
> #ifdef MULTIPROCESSOR
> caa.cpu_func = &mp_cpu_funcs;
> +#ifdef __amd64__
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> + caa.cpu_func = &mp_sev_es_cpu_funcs;
> +#endif
> #endif
> #ifdef __i386__
> /*
> @@ -318,6 +322,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux)
> caa.cpu_acpi_proc_id = entry->madt_x2apic.acpi_proc_uid;
> #ifdef MULTIPROCESSOR
> caa.cpu_func = &mp_cpu_funcs;
> +#ifdef __amd64__
> + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
> + caa.cpu_func = &mp_sev_es_cpu_funcs;
> +#endif
> #endif
> #ifdef __i386__
> /*
>
>
SEV-ES multiprocessor support