From: Mark Kettenis Subject: Re: SEV-ES multiprocessor support To: Stefan Fritsch Cc: tech@openbsd.org, mlarkin@nested.page Date: Thu, 18 Sep 2025 13:22:15 +0200 > Date: Thu, 18 Sep 2025 13:17:32 +0200 (CEST) > From: Stefan Fritsch > > Hi, > > one remaining problem with SEV-ES is that we don't support multiprocessor > yet, and booting openbsd in a SEV-ES VM that has several VCPUs hangs at > > cpu3 at mainbus0: apid 3 (application processor) > cpu3: failed to become ready > cpu3: failed to identify > > Sometimes it continues after some time, but often it does not. I am not > sure if the problem is on our side or if there is some error handling > missing in qemu/KVM. Even if it does not hang, some things do not work > correctly, like sysctl hw.ncpu is wrong, top gives warnings, ... > > In any case, I think this should be fixed somehow before the release, in > order to avoid support requests on the lists. There are two ways forward: > > 1) try to get SEV-ES MP support finished before the release. > > 2) commit some workaround that prevents openbsd from trying to use the > application processors if SEV-ES is enabled. Likely in cpu_match(). > > The diff that implements MP support is attached below. With this diff, > openbsd works for me in a 4 VCPU VM with SEV-ES enabled. > > There is also the question if we actually need MP support for SEV-ES. > SEV-ES is just a intermediate step and in the end, most people will want > to use SEV-SNP (supported in Zen 3 Epyc CPUs and later). MP CPU bringup is > again a bit different with SEV-SNP compared to SEV-ES, though the larger > part of the diff is needed for both variants. In my opinion, skipping MP > support for SEV-ES and only implementing it for SEV-SNP later is also an > option. > > I doubt there is enough time for 1). But I could start splitting the diff > into reviewable parts and we will see how far we get. > > What do you think? I think no. If it isn't finished, it isn't finished. Therwe will be time to do things properly after the tree unlocks. > diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c > index 2611859f3f5..247f7b8cff1 100644 > --- a/sys/arch/amd64/amd64/cpu.c > +++ b/sys/arch/amd64/amd64/cpu.c > @@ -95,6 +95,7 @@ > #include > #include > #include > +#include > > #if NLAPIC > 0 > #include > @@ -438,6 +439,10 @@ int mp_cpu_start(struct cpu_info *); > void mp_cpu_start_cleanup(struct cpu_info *); > struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, > mp_cpu_start_cleanup }; > +int mp_sev_es_cpu_start(struct cpu_info *); > +void mp_sev_es_cpu_start_cleanup(struct cpu_info *); > +struct cpu_functions mp_sev_es_cpu_funcs = { mp_sev_es_cpu_start, NULL, > + mp_sev_es_cpu_start_cleanup }; > #endif /* MULTIPROCESSOR */ > > const struct cfattach cpu_ca = { > @@ -606,6 +611,27 @@ cpu_attach(struct device *parent, struct device *self, void *aux) > ci->ci_tlog_base = malloc(sizeof(struct tlog), > M_DEVBUF, M_WAITOK); > #endif > + > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) { > + struct ghcb_sa *ghcb_va = NULL; > + struct vm_page *ghcb_page; > + > + ghcb_page = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); > + if (ghcb_page == NULL) > + panic("failed to allocate GHCB page"); > + > + ghcb_va = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); > + if (ghcb_va == NULL) > + panic("failed to allocate virtual GHCB address"); > + > + pmap_kenter_pa((vaddr_t)ghcb_va, ghcb_page->phys_addr | PMAP_NOCRYPT, > + PROT_READ | PROT_WRITE); > + > + ci->ci_ghcb_paddr = ghcb_page->phys_addr; > + ci->ci_ghcb = ghcb_va; > + > + memset(ghcb_va, 0, PAGE_SIZE); > + } > } else { > ci = &cpu_info_primary; > #if defined(MULTIPROCESSOR) > @@ -1031,6 +1057,24 @@ cpu_hatch(void *v) > struct cpu_info *ci = (struct cpu_info *)v; > int s; > > + /* We need the GSBASE MSR for the vctrap handler to work. > + * CPUID will trap into the #VC trap handler on AMD SEV-ES. > + */ > + cpu_init_msrs(ci); > + > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) { > + extern int x2apic_enabled; > + > + /* Load IDT early for #VC handler */ > + cpu_init_idt(); > + if (x2apic_enabled) { > + /* Now that we have a #VC handler, we are able > + * to enable x2APIC. > + */ > + wrmsr(MSR_APICBASE, rdmsr(MSR_APICBASE) | APICBASE_ENABLE_X2APIC); > + } > + } > + > { > uint32_t vendor[4]; > int level; > @@ -1040,7 +1084,6 @@ cpu_hatch(void *v) > cpu_set_vendor(ci, level, (const char *)vendor); > } > > - cpu_init_msrs(ci); > > #ifdef DEBUG > if (ci->ci_flags & CPUF_PRESENT) > @@ -1205,6 +1248,60 @@ mp_cpu_start_cleanup(struct cpu_info *ci) > outb(IO_RTC, NVRAM_RESET); > outb(IO_RTC+1, NVRAM_RESET_RST); > } > + > +paddr_t sev_es_jmp_tbl_addr; > + > +int mp_sev_es_cpu_start(struct cpu_info *ci) > +{ > + struct { > + uint16_t reset_ip; > + uint16_t reset_cs; > + } *jmp_tbl; > + > + if (sev_es_jmp_tbl_addr == 0) { > + paddr_t jmp_tbl_paddr; > + > + if (!ghcb_get_ap_jump_table(&jmp_tbl_paddr)) > + sev_es_jmp_tbl_addr = jmp_tbl_paddr & ~PAGE_MASK; > + else > + panic("failed to get AP jump table address"); > + > + /* Update the AP jump table only once */ > + jmp_tbl = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); > + if (jmp_tbl == NULL) > + panic("failed to allocate virtual address"); > + > + pmap_kenter_pa((vaddr_t)jmp_tbl, sev_es_jmp_tbl_addr, > + PROT_READ | PROT_WRITE); > + > + jmp_tbl->reset_ip = 0; > + jmp_tbl->reset_cs = MP_TRAMPOLINE >> 4; > + > + pmap_kremove((vaddr_t)jmp_tbl, PAGE_SIZE); > + km_free(jmp_tbl, PAGE_SIZE, &kv_any, &kp_none); > + } > + > + if (ci->ci_flags & CPUF_AP) { > + x86_ipi_init(ci->ci_apicid); > + > + delay(10000); > + > + if (cpu_feature & CPUID_APIC) { > + x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP); > + delay(200); > + > + x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP); > + delay(200); > + } > + } > + > + return 0; > +} > + > +void mp_sev_es_cpu_start_cleanup(struct cpu_info *ci) > +{ > + (void)ci; > +} > #endif /* MULTIPROCESSOR */ > > typedef void (vector)(void); > diff --git a/sys/arch/amd64/amd64/ghcb.c b/sys/arch/amd64/amd64/ghcb.c > index 2b0fa809570..aace7f28303 100644 > --- a/sys/arch/amd64/amd64/ghcb.c > +++ b/sys/arch/amd64/amd64/ghcb.c > @@ -47,9 +47,6 @@ const uint64_t ghcb_sz_clear_masks[] = { > 0xffffffffffffffffULL, 0xffffffffffffffffULL > }; > > -vaddr_t ghcb_vaddr; > -paddr_t ghcb_paddr; > - > /* > * ghcb_clear > * > @@ -254,6 +251,11 @@ ghcb_sync_in(struct trapframe *frame, struct ghcb_extra_regs *regs, > frame->tf_rdx |= (ghcb->v_rdx & ghcb_sz_masks[gsin->sz_d]); > } > > + if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO1)) > + regs->exitinfo1 = ghcb->v_sw_exitinfo1; > + if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO2)) > + regs->exitinfo2 = ghcb->v_sw_exitinfo2; > + > if (regs && regs->data) { > data_sz = regs->data_sz; > KASSERT(data_sz <= sizeof(ghcb->v_sharedbuf)); > @@ -303,14 +305,14 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read) > ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_READ; > ghcb_regs.exitinfo1 = paddr; > ghcb_regs.exitinfo2 = size; > - ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa, > - v_sharedbuf); > + ghcb_regs.scratch = curcpu()->ci_ghcb_paddr + > + offsetof(struct ghcb_sa, v_sharedbuf); > } else { > ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_WRITE; > ghcb_regs.exitinfo1 = paddr; > ghcb_regs.exitinfo2 = size; > - ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa, > - v_sharedbuf); > + ghcb_regs.scratch = curcpu()->ci_ghcb_paddr + > + offsetof(struct ghcb_sa, v_sharedbuf); > ghcb_regs.data = val; > ghcb_regs.data_sz = size; > } > @@ -322,10 +324,10 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read) > > s = intr_disable(); > > - ghcb = (struct ghcb_sa *)ghcb_vaddr; > - ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout); > + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); > > - wrmsr(MSR_SEV_GHCB, ghcb_paddr); > + ghcb = curcpu()->ci_ghcb; > + ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout); > > vmgexit(); > > @@ -399,10 +401,10 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read) > > s = intr_disable(); > > - ghcb = (struct ghcb_sa *)ghcb_vaddr; > + ghcb = curcpu()->ci_ghcb; > ghcb_sync_out(&frame, &ghcb_regs, ghcb, &syncout); > > - wrmsr(MSR_SEV_GHCB, ghcb_paddr); > + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); > > vmgexit(); > > @@ -418,3 +420,55 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read) > if (read) > *val = frame.tf_rax; > } > + > +#ifdef MULTIPROCESSOR > +int > +ghcb_get_ap_jump_table(paddr_t *jmp_tbl_addr) > +{ > + struct ghcb_sa *ghcb; > + struct ghcb_sync syncout, syncin; > + struct ghcb_extra_regs ghcb_regs; > + unsigned long s; > + > + memset(&syncout, 0, sizeof(syncout)); > + memset(&syncin, 0, sizeof(syncin)); > + memset(&ghcb_regs, 0, sizeof(ghcb_regs)); > + > + ghcb_regs.exitcode = SEV_VMGEXIT_AP_JUMP_TABLE; > + ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout); > + ghcb_regs.exitinfo1 = 1; // GET > + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout); > + ghcb_regs.exitinfo2 = 0; > + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout); > + > + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncin); > + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncin); > + > + s = intr_disable(); > + > + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); > + > + ghcb = curcpu()->ci_ghcb; > + ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout); > + > + vmgexit(); > + > + if (ghcb_verify_bm(ghcb->valid_bitmap, syncin.valid_bitmap)) { > + ghcb_clear(ghcb); > + panic("invalid hypervisor response"); > + } > + > + memset(&ghcb_regs, 0, sizeof(ghcb_regs)); > + > + ghcb_sync_in(NULL, &ghcb_regs, ghcb, &syncin); > + > + intr_restore(s); > + > + if (ghcb_regs.exitinfo1 == 0) { > + *jmp_tbl_addr = ghcb_regs.exitinfo2; > + return 0; > + } else { > + return 1; > + } > +} > +#endif > diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c > index f7fdb81ccca..80436294e6f 100644 > --- a/sys/arch/amd64/amd64/lapic.c > +++ b/sys/arch/amd64/amd64/lapic.c > @@ -99,6 +99,7 @@ struct pic local_pic = { > }; > > extern int x2apic_eoi; > +extern int x2apic_eoi_swapgs; > int x2apic_enabled = 0; > > u_int32_t x2apic_readreg(int reg); > @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base) > #endif > x2apic_enabled = 1; > codepatch_call(CPTAG_EOI, &x2apic_eoi); > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) > + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs); > + else > + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi); > > va = (vaddr_t)&local_apic; > } else { > @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base) > pte = kvtopte(va); > *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx; > invlpg(va); > + > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) > + panic("xAPIC mode not implemented for SEV-ES"); > } > > /* > diff --git a/sys/arch/amd64/amd64/locore0.S b/sys/arch/amd64/amd64/locore0.S > index 4533b19df2f..951da60b1d2 100644 > --- a/sys/arch/amd64/amd64/locore0.S > +++ b/sys/arch/amd64/amd64/locore0.S > @@ -804,15 +804,6 @@ longmode_hi: > addq %rsi,%rdx > movq %rdx,atdevbase(%rip) > > - /* Relocate GHCB. */ > - movq cpu_sev_guestmode(%rip),%rax > - testq $SEV_STAT_ES_ENABLED,%rax > - jz .Lnoghcbreloc > - movq $(PROC0_GHCB_OFF+KERNBASE),%rdx > - addq %rsi,%rdx > - movq %rdx,ghcb_vaddr(%rip) > - > -.Lnoghcbreloc: > /* Record start of symbols */ > movq $__kernel_bss_end, ssym(%rip) > > diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c > index 3de32b26354..d86ec85db67 100644 > --- a/sys/arch/amd64/amd64/machdep.c > +++ b/sys/arch/amd64/amd64/machdep.c > @@ -1342,9 +1342,10 @@ cpu_init_early_vctrap(paddr_t addr) > cpu_init_idt(); > > /* Tell vmm(4) about our GHCB. */ > - ghcb_paddr = addr; > - memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE); > - wrmsr(MSR_SEV_GHCB, ghcb_paddr); > + cpu_info_primary.ci_ghcb_paddr = addr; > + cpu_info_primary.ci_ghcb = (struct ghcb_sa *)(addr + KERNBASE); > + memset(cpu_info_primary.ci_ghcb, 0, 2 * PAGE_SIZE); > + wrmsr(MSR_SEV_GHCB, cpu_info_primary.ci_ghcb_paddr); > } > > void > @@ -1388,6 +1389,7 @@ map_tramps(void) > extern u_char mp_tramp_data_start[]; > extern u_char mp_tramp_data_end[]; > extern u_int32_t mp_pdirpa; > + extern u_int32_t mp_sev_guestmode; > #endif > > /* > @@ -1429,6 +1431,13 @@ map_tramps(void) > */ > mp_pdirpa = tramp_pdirpa; > > + /* > + * We need to introduce and set mp_sev_guestmode since the > + * global cpu_sev_guestmode variable may not be accessable in > + * 16 or 32 bit mode. > + */ > + mp_sev_guestmode = cpu_sev_guestmode; > + > /* Unmap, will be remapped in cpu_start_secondary */ > pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE); > pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE); > diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S > index 96247c8e890..838168843bf 100644 > --- a/sys/arch/amd64/amd64/mptramp.S > +++ b/sys/arch/amd64/amd64/mptramp.S > @@ -143,6 +143,14 @@ _TRMP_LABEL(.Lmp_startup) > rdmsr > movl %edx, %edi # %edx is needed by wrmsr below > > + # If SEV is enabled, we can assume that NXE is supported and we cannot > + # do cpuid, yet. > + movl $mp_sev_guestmode, %edx > + movl (%edx), %edx > + xorl %eax, %eax > + testl %edx, %edx > + jnz 4f > + > # Check if we need to enable NXE > movl $0x80000001, %eax > cpuid > @@ -150,6 +158,7 @@ _TRMP_LABEL(.Lmp_startup) > xorl %eax,%eax > testl %edx, %edx > jz 1f > +4: > orl $EFER_NXE, %eax > 1: > orl $(EFER_LME|EFER_SCE), %eax > @@ -192,6 +201,31 @@ END(cpu_spinup_trampoline) > > .text > GENTRY(cpu_spinup_finish) > + movl $mp_sev_guestmode, %eax > + movl (%eax), %eax > + testl $SEV_STAT_ES_ENABLED, %eax > + jz 5f > + > + # We are in SEV-ES mode. MSR or MMIO access is only possible > + # through a GHCB. Query APIC ID via CPUID leaf 1 EBX > + movl $1, %edx > + # EBX == 1, function 4 cpuid request > + movl $(1 << 30 | 4), %eax > + movl $MSR_SEV_GHCB, %ecx > + wrmsr > + rep vmmcall > + rdmsr > + # Make sure the query was successful > + cmpl $(1 << 30 | 5), %eax > + jne .Lsev_es_terminate > + > + movl %edx, %eax > + shrl $24, %eax > + # Skip x2apic initialization if running on SEV-ES or higher. > + # We cannot do rdmsr/wrmsr without a GHCB. Will be done later in cpu_hatch. > + jmp 2f > + > +5: > movl x2apic_enabled,%eax > testl %eax,%eax > jz 1f > @@ -234,9 +268,18 @@ GENTRY(cpu_spinup_finish) > movq %rax,%cr0 > call cpu_hatch > movq $0,-8(%rsp) > -END(cpu_spinup_finish) > /* NOTREACHED */ > > +.Lsev_es_terminate: > + xorl %edx, %edx > + movl $0x100, %eax > + movl $MSR_SEV_GHCB, %ecx > + wrmsr > + rep vmmcall > + hlt > + jmp .Lsev_es_terminate > +END(cpu_spinup_finish) > + > .section .rodata > .type mp_tramp_data_start,@object > mp_tramp_data_start: > @@ -250,6 +293,12 @@ _TRMP_DATA_LABEL(mp_pdirpa) > .long 0 > .size mp_pdirpa,4 > > + .global mp_sev_guestmode > + .type mp_sev_guestmode,@object > +_TRMP_DATA_LABEL(mp_sev_guestmode) > + .long 0 > + .size mp_sev_guestmode,4 > + > > _TRMP_DATA_LABEL(.Lmptramp_gdt32) > .quad 0x0000000000000000 > diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c > index 396366de89b..49c7c0ffb70 100644 > --- a/sys/arch/amd64/amd64/trap.c > +++ b/sys/arch/amd64/amd64/trap.c > @@ -426,6 +426,17 @@ vctrap(struct trapframe *frame, int user) > } > break; > } > + case SVM_VMEXIT_WBINVD: > + /* There is no special GHCB request for WBNOINVD. > + * Signal WBINVD to emulate WBNOINVD. > + */ > + if (*rip == 0xf3) > + frame->tf_rip += 3; > + else > + frame->tf_rip += 2; > + break; > + case SVM_VMEXIT_NPF: > + panic("Unexptected SEV nested page fault"); > default: > panic("invalid exit code 0x%llx", ghcb_regs.exitcode); > } > @@ -436,10 +447,10 @@ vctrap(struct trapframe *frame, int user) > ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout); > > /* Sync out to GHCB */ > - ghcb = (struct ghcb_sa *)ghcb_vaddr; > + ghcb = curcpu()->ci_ghcb; > ghcb_sync_out(frame, &ghcb_regs, ghcb, &syncout); > > - wrmsr(MSR_SEV_GHCB, ghcb_paddr); > + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); > > /* Call hypervisor. */ > vmgexit(); > diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S > index 8b82db6b4f6..cbfe817ea9c 100644 > --- a/sys/arch/amd64/amd64/vector.S > +++ b/sys/arch/amd64/amd64/vector.S > @@ -590,6 +590,40 @@ KUENTRY(x2apic_eoi) > lfence > END(x2apic_eoi) > > +/* > + * With SEV-ES the wrmsr instruction traps into the #VC handler which > + * needs the kernel GS_BASE. So if we come from the userland, we need to > + * do swapgs. The fast IPI handler does not perform swapgs, so we need > + * to do it here. In order to detect whether we come from user or kernel > + * land, this function MUST be called before %rsp is modified. > + */ > +KUENTRY(x2apic_eoi_swapgs) > + /* If the come from userland, go swapgs to enabled curcpu() */ > + testb $SEL_RPL,16(%rsp) > + jz 1f > + swapgs > + FENCE_SWAPGS_MIS_TAKEN > +1: > + pushq %rax > + pushq %rcx > + pushq %rdx > + mov $MSR_X2APIC_EOI,%ecx > + mov $0,%eax > + mov $0,%edx > + wrmsr > + popq %rdx > + popq %rcx > + popq %rax > + > + testb $SEL_RPL,16(%rsp) > + jz 2f > + swapgs > + FENCE_SWAPGS_MIS_TAKEN > +2: > + retq > + lfence > +END(x2apic_eoi_swapgs) > + > #if NLAPIC > 0 > #ifdef MULTIPROCESSOR > KIDTVEC(recurse_lapic_ipi) > @@ -629,9 +663,9 @@ END(Xresume_lapic_ipi) > */ > /* invalidate the entire TLB, no PCIDs version */ > IDTVEC(ipi_invltlb) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > movq %cr3, %rax > movq %rax, %cr3 > @@ -646,11 +680,11 @@ END(Xipi_invltlb) > #if NVMM > 0 > /* Invalidate VMX EPT */ > IDTVEC(ipi_invept) > + ioapic_asm_ack_fast_ipi() > + > pushq %rax > pushq %rdx > > - ioapic_asm_ack() > - > movq $ept_shoot_vid, %rax > movq ept_shoot_mode, %rdx > invept (%rax), %rdx > @@ -666,9 +700,9 @@ END(Xipi_invept) > > /* invalidate a single page, no PCIDs version */ > IDTVEC(ipi_invlpg) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > movq tlb_shoot_addr1, %rax > invlpg (%rax) > @@ -682,11 +716,11 @@ END(Xipi_invlpg) > > /* invalidate a range of pages, no PCIDs version */ > IDTVEC(ipi_invlrange) > + ioapic_asm_ack_fast_ipi() > + > pushq %rax > pushq %rdx > > - ioapic_asm_ack() > - > movq tlb_shoot_addr1, %rax > movq tlb_shoot_addr2, %rdx > 1: invlpg (%rax) > @@ -706,9 +740,9 @@ END(Xipi_invlrange) > * Invalidate the userspace PCIDs. > */ > IDTVEC(ipi_invltlb_pcid) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > /* set the type */ > movl $INVPCID_PCID,%eax > @@ -740,9 +774,9 @@ END(Xipi_invltlb_pcid) > * while userspace VAs are present in PCIDs 1 and 2. > */ > IDTVEC(ipi_invlpg_pcid) > - pushq %rax > + ioapic_asm_ack_fast_ipi() > > - ioapic_asm_ack() > + pushq %rax > > /* space for the INVPCID descriptor */ > subq $16,%rsp > @@ -777,12 +811,12 @@ END(Xipi_invlpg_pcid) > * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2. > */ > IDTVEC(ipi_invlrange_pcid) > + ioapic_asm_ack_fast_ipi() > + > pushq %rax > pushq %rdx > pushq %rcx > > - ioapic_asm_ack() > - > /* space for the INVPCID descriptor */ > subq $16,%rsp > > @@ -817,7 +851,7 @@ IDTVEC(ipi_invlrange_pcid) > END(Xipi_invlrange_pcid) > > IDTVEC(ipi_wbinvd) > - ioapic_asm_ack() > + ioapic_asm_ack_fast_ipi() > > wbinvd > > diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h > index 2ccb638a8e8..6b6bfee62e1 100644 > --- a/sys/arch/amd64/include/codepatch.h > +++ b/sys/arch/amd64/include/codepatch.h > @@ -70,6 +70,7 @@ void codepatch_disable(void); > #define CPTAG_RETPOLINE_R11 15 > #define CPTAG_RETPOLINE_R13 16 > #define CPTAG_IBPB_NOP 17 > +#define CPTAG_EOI_FAST_IPI 18 > > /* > * stac/clac SMAP instructions have lfence like semantics. Let's > diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h > index 8c71c424a8f..6b725ff796a 100644 > --- a/sys/arch/amd64/include/cpu.h > +++ b/sys/arch/amd64/include/cpu.h > @@ -107,6 +107,7 @@ enum cpu_vendor { > */ > struct x86_64_tss; > struct vcpu; > +struct ghcb_sa; > struct cpu_info { > /* > * The beginning of this structure in mapped in the userspace "u-k" > @@ -219,6 +220,9 @@ struct cpu_info { > struct uvm_pmr_cache ci_uvm; /* [o] page cache */ > #endif > > + struct ghcb_sa *ci_ghcb; > + paddr_t ci_ghcb_paddr; > + > struct ksensordev ci_sensordev; > struct ksensor ci_sensor; > struct ksensor ci_hz_sensor; > diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h > index fb1de0cb1b1..5b2669a36aa 100644 > --- a/sys/arch/amd64/include/cpuvar.h > +++ b/sys/arch/amd64/include/cpuvar.h > @@ -71,6 +71,7 @@ struct cpu_functions { > }; > > extern struct cpu_functions mp_cpu_funcs; > +extern struct cpu_functions mp_sev_es_cpu_funcs; > > #define CPU_ROLE_SP 0 > #define CPU_ROLE_BP 1 > diff --git a/sys/arch/amd64/include/ghcb.h b/sys/arch/amd64/include/ghcb.h > index bac63968d24..a39d5a9401f 100644 > --- a/sys/arch/amd64/include/ghcb.h > +++ b/sys/arch/amd64/include/ghcb.h > @@ -111,9 +111,6 @@ struct ghcb_sync { > > #ifndef _LOCORE > > -extern vaddr_t ghcb_vaddr; > -extern paddr_t ghcb_paddr; > - > struct ghcb_extra_regs { > uint64_t exitcode; > uint64_t exitinfo1; > @@ -136,6 +133,9 @@ void ghcb_sync_in(struct trapframe *, struct ghcb_extra_regs *, > struct ghcb_sa *, struct ghcb_sync *); > void _ghcb_mem_rw(vaddr_t, int, void *, bool); > void _ghcb_io_rw(uint16_t, int, uint32_t *, bool); > +#ifdef MULTIPROCESSOR > +int ghcb_get_ap_jump_table(paddr_t *); > +#endif > > static inline uint8_t > ghcb_mem_read_1(vaddr_t addr) > diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h > index 99b22923499..3288176fb22 100644 > --- a/sys/arch/amd64/include/i82093reg.h > +++ b/sys/arch/amd64/include/i82093reg.h > @@ -114,7 +114,21 @@ > > #include > > -#define ioapic_asm_ack(num) \ > +/* > + * This macro must also work if swapgs has not been called on entry > + * from user land. > + */ > +#define ioapic_asm_ack_fast_ipi(num) \ > + CODEPATCH_START ;\ > + movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ > + CODEPATCH_END(CPTAG_EOI_FAST_IPI) > + > + > +/* > + * This macro assumes that swapgs has already been called (e.g. by > + * INTRENTRY). > + */ > +#define ioapic_asm_ack(num) \ > CODEPATCH_START ;\ > movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ > CODEPATCH_END(CPTAG_EOI) > diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h > index 25b1618ad1f..23ee60eb465 100644 > --- a/sys/arch/amd64/include/vmmvar.h > +++ b/sys/arch/amd64/include/vmmvar.h > @@ -271,6 +271,7 @@ > */ > #define SEV_VMGEXIT_MMIO_READ 0x80000001 > #define SEV_VMGEXIT_MMIO_WRITE 0x80000002 > +#define SEV_VMGEXIT_AP_JUMP_TABLE 0x80000005 > > #ifndef _LOCORE > > diff --git a/sys/dev/acpi/acpimadt.c b/sys/dev/acpi/acpimadt.c > index 275f2b1e6ce..f9f3a0a6538 100644 > --- a/sys/dev/acpi/acpimadt.c > +++ b/sys/dev/acpi/acpimadt.c > @@ -263,6 +263,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux) > caa.cpu_acpi_proc_id = entry->madt_lapic.acpi_proc_id; > #ifdef MULTIPROCESSOR > caa.cpu_func = &mp_cpu_funcs; > +#ifdef __amd64__ > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) > + caa.cpu_func = &mp_sev_es_cpu_funcs; > +#endif > #endif > #ifdef __i386__ > /* > @@ -318,6 +322,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux) > caa.cpu_acpi_proc_id = entry->madt_x2apic.acpi_proc_uid; > #ifdef MULTIPROCESSOR > caa.cpu_func = &mp_cpu_funcs; > +#ifdef __amd64__ > + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) > + caa.cpu_func = &mp_sev_es_cpu_funcs; > +#endif > #endif > #ifdef __i386__ > /* > >