From: Stefan Fritsch Subject: SEV-ES multiprocessor support To: tech@openbsd.org Cc: Mike Larkin Date: Thu, 18 Sep 2025 13:17:32 +0200 Hi, one remaining problem with SEV-ES is that we don't support multiprocessor yet, and booting openbsd in a SEV-ES VM that has several VCPUs hangs at cpu3 at mainbus0: apid 3 (application processor) cpu3: failed to become ready cpu3: failed to identify Sometimes it continues after some time, but often it does not. I am not sure if the problem is on our side or if there is some error handling missing in qemu/KVM. Even if it does not hang, some things do not work correctly, like sysctl hw.ncpu is wrong, top gives warnings, ... In any case, I think this should be fixed somehow before the release, in order to avoid support requests on the lists. There are two ways forward: 1) try to get SEV-ES MP support finished before the release. 2) commit some workaround that prevents openbsd from trying to use the application processors if SEV-ES is enabled. Likely in cpu_match(). The diff that implements MP support is attached below. With this diff, openbsd works for me in a 4 VCPU VM with SEV-ES enabled. There is also the question if we actually need MP support for SEV-ES. SEV-ES is just a intermediate step and in the end, most people will want to use SEV-SNP (supported in Zen 3 Epyc CPUs and later). MP CPU bringup is again a bit different with SEV-SNP compared to SEV-ES, though the larger part of the diff is needed for both variants. In my opinion, skipping MP support for SEV-ES and only implementing it for SEV-SNP later is also an option. I doubt there is enough time for 1). But I could start splitting the diff into reviewable parts and we will see how far we get. What do you think? Cheers, Stefan diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 2611859f3f5..247f7b8cff1 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -95,6 +95,7 @@ #include #include #include +#include #if NLAPIC > 0 #include @@ -438,6 +439,10 @@ int mp_cpu_start(struct cpu_info *); void mp_cpu_start_cleanup(struct cpu_info *); struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, mp_cpu_start_cleanup }; +int mp_sev_es_cpu_start(struct cpu_info *); +void mp_sev_es_cpu_start_cleanup(struct cpu_info *); +struct cpu_functions mp_sev_es_cpu_funcs = { mp_sev_es_cpu_start, NULL, + mp_sev_es_cpu_start_cleanup }; #endif /* MULTIPROCESSOR */ const struct cfattach cpu_ca = { @@ -606,6 +611,27 @@ cpu_attach(struct device *parent, struct device *self, void *aux) ci->ci_tlog_base = malloc(sizeof(struct tlog), M_DEVBUF, M_WAITOK); #endif + + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) { + struct ghcb_sa *ghcb_va = NULL; + struct vm_page *ghcb_page; + + ghcb_page = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); + if (ghcb_page == NULL) + panic("failed to allocate GHCB page"); + + ghcb_va = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); + if (ghcb_va == NULL) + panic("failed to allocate virtual GHCB address"); + + pmap_kenter_pa((vaddr_t)ghcb_va, ghcb_page->phys_addr | PMAP_NOCRYPT, + PROT_READ | PROT_WRITE); + + ci->ci_ghcb_paddr = ghcb_page->phys_addr; + ci->ci_ghcb = ghcb_va; + + memset(ghcb_va, 0, PAGE_SIZE); + } } else { ci = &cpu_info_primary; #if defined(MULTIPROCESSOR) @@ -1031,6 +1057,24 @@ cpu_hatch(void *v) struct cpu_info *ci = (struct cpu_info *)v; int s; + /* We need the GSBASE MSR for the vctrap handler to work. + * CPUID will trap into the #VC trap handler on AMD SEV-ES. + */ + cpu_init_msrs(ci); + + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) { + extern int x2apic_enabled; + + /* Load IDT early for #VC handler */ + cpu_init_idt(); + if (x2apic_enabled) { + /* Now that we have a #VC handler, we are able + * to enable x2APIC. + */ + wrmsr(MSR_APICBASE, rdmsr(MSR_APICBASE) | APICBASE_ENABLE_X2APIC); + } + } + { uint32_t vendor[4]; int level; @@ -1040,7 +1084,6 @@ cpu_hatch(void *v) cpu_set_vendor(ci, level, (const char *)vendor); } - cpu_init_msrs(ci); #ifdef DEBUG if (ci->ci_flags & CPUF_PRESENT) @@ -1205,6 +1248,60 @@ mp_cpu_start_cleanup(struct cpu_info *ci) outb(IO_RTC, NVRAM_RESET); outb(IO_RTC+1, NVRAM_RESET_RST); } + +paddr_t sev_es_jmp_tbl_addr; + +int mp_sev_es_cpu_start(struct cpu_info *ci) +{ + struct { + uint16_t reset_ip; + uint16_t reset_cs; + } *jmp_tbl; + + if (sev_es_jmp_tbl_addr == 0) { + paddr_t jmp_tbl_paddr; + + if (!ghcb_get_ap_jump_table(&jmp_tbl_paddr)) + sev_es_jmp_tbl_addr = jmp_tbl_paddr & ~PAGE_MASK; + else + panic("failed to get AP jump table address"); + + /* Update the AP jump table only once */ + jmp_tbl = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait); + if (jmp_tbl == NULL) + panic("failed to allocate virtual address"); + + pmap_kenter_pa((vaddr_t)jmp_tbl, sev_es_jmp_tbl_addr, + PROT_READ | PROT_WRITE); + + jmp_tbl->reset_ip = 0; + jmp_tbl->reset_cs = MP_TRAMPOLINE >> 4; + + pmap_kremove((vaddr_t)jmp_tbl, PAGE_SIZE); + km_free(jmp_tbl, PAGE_SIZE, &kv_any, &kp_none); + } + + if (ci->ci_flags & CPUF_AP) { + x86_ipi_init(ci->ci_apicid); + + delay(10000); + + if (cpu_feature & CPUID_APIC) { + x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP); + delay(200); + + x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP); + delay(200); + } + } + + return 0; +} + +void mp_sev_es_cpu_start_cleanup(struct cpu_info *ci) +{ + (void)ci; +} #endif /* MULTIPROCESSOR */ typedef void (vector)(void); diff --git a/sys/arch/amd64/amd64/ghcb.c b/sys/arch/amd64/amd64/ghcb.c index 2b0fa809570..aace7f28303 100644 --- a/sys/arch/amd64/amd64/ghcb.c +++ b/sys/arch/amd64/amd64/ghcb.c @@ -47,9 +47,6 @@ const uint64_t ghcb_sz_clear_masks[] = { 0xffffffffffffffffULL, 0xffffffffffffffffULL }; -vaddr_t ghcb_vaddr; -paddr_t ghcb_paddr; - /* * ghcb_clear * @@ -254,6 +251,11 @@ ghcb_sync_in(struct trapframe *frame, struct ghcb_extra_regs *regs, frame->tf_rdx |= (ghcb->v_rdx & ghcb_sz_masks[gsin->sz_d]); } + if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO1)) + regs->exitinfo1 = ghcb->v_sw_exitinfo1; + if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO2)) + regs->exitinfo2 = ghcb->v_sw_exitinfo2; + if (regs && regs->data) { data_sz = regs->data_sz; KASSERT(data_sz <= sizeof(ghcb->v_sharedbuf)); @@ -303,14 +305,14 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read) ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_READ; ghcb_regs.exitinfo1 = paddr; ghcb_regs.exitinfo2 = size; - ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa, - v_sharedbuf); + ghcb_regs.scratch = curcpu()->ci_ghcb_paddr + + offsetof(struct ghcb_sa, v_sharedbuf); } else { ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_WRITE; ghcb_regs.exitinfo1 = paddr; ghcb_regs.exitinfo2 = size; - ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa, - v_sharedbuf); + ghcb_regs.scratch = curcpu()->ci_ghcb_paddr + + offsetof(struct ghcb_sa, v_sharedbuf); ghcb_regs.data = val; ghcb_regs.data_sz = size; } @@ -322,10 +324,10 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read) s = intr_disable(); - ghcb = (struct ghcb_sa *)ghcb_vaddr; - ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout); + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); - wrmsr(MSR_SEV_GHCB, ghcb_paddr); + ghcb = curcpu()->ci_ghcb; + ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout); vmgexit(); @@ -399,10 +401,10 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read) s = intr_disable(); - ghcb = (struct ghcb_sa *)ghcb_vaddr; + ghcb = curcpu()->ci_ghcb; ghcb_sync_out(&frame, &ghcb_regs, ghcb, &syncout); - wrmsr(MSR_SEV_GHCB, ghcb_paddr); + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); vmgexit(); @@ -418,3 +420,55 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read) if (read) *val = frame.tf_rax; } + +#ifdef MULTIPROCESSOR +int +ghcb_get_ap_jump_table(paddr_t *jmp_tbl_addr) +{ + struct ghcb_sa *ghcb; + struct ghcb_sync syncout, syncin; + struct ghcb_extra_regs ghcb_regs; + unsigned long s; + + memset(&syncout, 0, sizeof(syncout)); + memset(&syncin, 0, sizeof(syncin)); + memset(&ghcb_regs, 0, sizeof(ghcb_regs)); + + ghcb_regs.exitcode = SEV_VMGEXIT_AP_JUMP_TABLE; + ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout); + ghcb_regs.exitinfo1 = 1; // GET + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout); + ghcb_regs.exitinfo2 = 0; + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout); + + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncin); + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncin); + + s = intr_disable(); + + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); + + ghcb = curcpu()->ci_ghcb; + ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout); + + vmgexit(); + + if (ghcb_verify_bm(ghcb->valid_bitmap, syncin.valid_bitmap)) { + ghcb_clear(ghcb); + panic("invalid hypervisor response"); + } + + memset(&ghcb_regs, 0, sizeof(ghcb_regs)); + + ghcb_sync_in(NULL, &ghcb_regs, ghcb, &syncin); + + intr_restore(s); + + if (ghcb_regs.exitinfo1 == 0) { + *jmp_tbl_addr = ghcb_regs.exitinfo2; + return 0; + } else { + return 1; + } +} +#endif diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index f7fdb81ccca..80436294e6f 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -99,6 +99,7 @@ struct pic local_pic = { }; extern int x2apic_eoi; +extern int x2apic_eoi_swapgs; int x2apic_enabled = 0; u_int32_t x2apic_readreg(int reg); @@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base) #endif x2apic_enabled = 1; codepatch_call(CPTAG_EOI, &x2apic_eoi); + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs); + else + codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi); va = (vaddr_t)&local_apic; } else { @@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base) pte = kvtopte(va); *pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx; invlpg(va); + + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + panic("xAPIC mode not implemented for SEV-ES"); } /* diff --git a/sys/arch/amd64/amd64/locore0.S b/sys/arch/amd64/amd64/locore0.S index 4533b19df2f..951da60b1d2 100644 --- a/sys/arch/amd64/amd64/locore0.S +++ b/sys/arch/amd64/amd64/locore0.S @@ -804,15 +804,6 @@ longmode_hi: addq %rsi,%rdx movq %rdx,atdevbase(%rip) - /* Relocate GHCB. */ - movq cpu_sev_guestmode(%rip),%rax - testq $SEV_STAT_ES_ENABLED,%rax - jz .Lnoghcbreloc - movq $(PROC0_GHCB_OFF+KERNBASE),%rdx - addq %rsi,%rdx - movq %rdx,ghcb_vaddr(%rip) - -.Lnoghcbreloc: /* Record start of symbols */ movq $__kernel_bss_end, ssym(%rip) diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 3de32b26354..d86ec85db67 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1342,9 +1342,10 @@ cpu_init_early_vctrap(paddr_t addr) cpu_init_idt(); /* Tell vmm(4) about our GHCB. */ - ghcb_paddr = addr; - memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE); - wrmsr(MSR_SEV_GHCB, ghcb_paddr); + cpu_info_primary.ci_ghcb_paddr = addr; + cpu_info_primary.ci_ghcb = (struct ghcb_sa *)(addr + KERNBASE); + memset(cpu_info_primary.ci_ghcb, 0, 2 * PAGE_SIZE); + wrmsr(MSR_SEV_GHCB, cpu_info_primary.ci_ghcb_paddr); } void @@ -1388,6 +1389,7 @@ map_tramps(void) extern u_char mp_tramp_data_start[]; extern u_char mp_tramp_data_end[]; extern u_int32_t mp_pdirpa; + extern u_int32_t mp_sev_guestmode; #endif /* @@ -1429,6 +1431,13 @@ map_tramps(void) */ mp_pdirpa = tramp_pdirpa; + /* + * We need to introduce and set mp_sev_guestmode since the + * global cpu_sev_guestmode variable may not be accessable in + * 16 or 32 bit mode. + */ + mp_sev_guestmode = cpu_sev_guestmode; + /* Unmap, will be remapped in cpu_start_secondary */ pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE); pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE); diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S index 96247c8e890..838168843bf 100644 --- a/sys/arch/amd64/amd64/mptramp.S +++ b/sys/arch/amd64/amd64/mptramp.S @@ -143,6 +143,14 @@ _TRMP_LABEL(.Lmp_startup) rdmsr movl %edx, %edi # %edx is needed by wrmsr below + # If SEV is enabled, we can assume that NXE is supported and we cannot + # do cpuid, yet. + movl $mp_sev_guestmode, %edx + movl (%edx), %edx + xorl %eax, %eax + testl %edx, %edx + jnz 4f + # Check if we need to enable NXE movl $0x80000001, %eax cpuid @@ -150,6 +158,7 @@ _TRMP_LABEL(.Lmp_startup) xorl %eax,%eax testl %edx, %edx jz 1f +4: orl $EFER_NXE, %eax 1: orl $(EFER_LME|EFER_SCE), %eax @@ -192,6 +201,31 @@ END(cpu_spinup_trampoline) .text GENTRY(cpu_spinup_finish) + movl $mp_sev_guestmode, %eax + movl (%eax), %eax + testl $SEV_STAT_ES_ENABLED, %eax + jz 5f + + # We are in SEV-ES mode. MSR or MMIO access is only possible + # through a GHCB. Query APIC ID via CPUID leaf 1 EBX + movl $1, %edx + # EBX == 1, function 4 cpuid request + movl $(1 << 30 | 4), %eax + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall + rdmsr + # Make sure the query was successful + cmpl $(1 << 30 | 5), %eax + jne .Lsev_es_terminate + + movl %edx, %eax + shrl $24, %eax + # Skip x2apic initialization if running on SEV-ES or higher. + # We cannot do rdmsr/wrmsr without a GHCB. Will be done later in cpu_hatch. + jmp 2f + +5: movl x2apic_enabled,%eax testl %eax,%eax jz 1f @@ -234,9 +268,18 @@ GENTRY(cpu_spinup_finish) movq %rax,%cr0 call cpu_hatch movq $0,-8(%rsp) -END(cpu_spinup_finish) /* NOTREACHED */ +.Lsev_es_terminate: + xorl %edx, %edx + movl $0x100, %eax + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall + hlt + jmp .Lsev_es_terminate +END(cpu_spinup_finish) + .section .rodata .type mp_tramp_data_start,@object mp_tramp_data_start: @@ -250,6 +293,12 @@ _TRMP_DATA_LABEL(mp_pdirpa) .long 0 .size mp_pdirpa,4 + .global mp_sev_guestmode + .type mp_sev_guestmode,@object +_TRMP_DATA_LABEL(mp_sev_guestmode) + .long 0 + .size mp_sev_guestmode,4 + _TRMP_DATA_LABEL(.Lmptramp_gdt32) .quad 0x0000000000000000 diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c index 396366de89b..49c7c0ffb70 100644 --- a/sys/arch/amd64/amd64/trap.c +++ b/sys/arch/amd64/amd64/trap.c @@ -426,6 +426,17 @@ vctrap(struct trapframe *frame, int user) } break; } + case SVM_VMEXIT_WBINVD: + /* There is no special GHCB request for WBNOINVD. + * Signal WBINVD to emulate WBNOINVD. + */ + if (*rip == 0xf3) + frame->tf_rip += 3; + else + frame->tf_rip += 2; + break; + case SVM_VMEXIT_NPF: + panic("Unexptected SEV nested page fault"); default: panic("invalid exit code 0x%llx", ghcb_regs.exitcode); } @@ -436,10 +447,10 @@ vctrap(struct trapframe *frame, int user) ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout); /* Sync out to GHCB */ - ghcb = (struct ghcb_sa *)ghcb_vaddr; + ghcb = curcpu()->ci_ghcb; ghcb_sync_out(frame, &ghcb_regs, ghcb, &syncout); - wrmsr(MSR_SEV_GHCB, ghcb_paddr); + wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr); /* Call hypervisor. */ vmgexit(); diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index 8b82db6b4f6..cbfe817ea9c 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -590,6 +590,40 @@ KUENTRY(x2apic_eoi) lfence END(x2apic_eoi) +/* + * With SEV-ES the wrmsr instruction traps into the #VC handler which + * needs the kernel GS_BASE. So if we come from the userland, we need to + * do swapgs. The fast IPI handler does not perform swapgs, so we need + * to do it here. In order to detect whether we come from user or kernel + * land, this function MUST be called before %rsp is modified. + */ +KUENTRY(x2apic_eoi_swapgs) + /* If the come from userland, go swapgs to enabled curcpu() */ + testb $SEL_RPL,16(%rsp) + jz 1f + swapgs + FENCE_SWAPGS_MIS_TAKEN +1: + pushq %rax + pushq %rcx + pushq %rdx + mov $MSR_X2APIC_EOI,%ecx + mov $0,%eax + mov $0,%edx + wrmsr + popq %rdx + popq %rcx + popq %rax + + testb $SEL_RPL,16(%rsp) + jz 2f + swapgs + FENCE_SWAPGS_MIS_TAKEN +2: + retq + lfence +END(x2apic_eoi_swapgs) + #if NLAPIC > 0 #ifdef MULTIPROCESSOR KIDTVEC(recurse_lapic_ipi) @@ -629,9 +663,9 @@ END(Xresume_lapic_ipi) */ /* invalidate the entire TLB, no PCIDs version */ IDTVEC(ipi_invltlb) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax movq %cr3, %rax movq %rax, %cr3 @@ -646,11 +680,11 @@ END(Xipi_invltlb) #if NVMM > 0 /* Invalidate VMX EPT */ IDTVEC(ipi_invept) + ioapic_asm_ack_fast_ipi() + pushq %rax pushq %rdx - ioapic_asm_ack() - movq $ept_shoot_vid, %rax movq ept_shoot_mode, %rdx invept (%rax), %rdx @@ -666,9 +700,9 @@ END(Xipi_invept) /* invalidate a single page, no PCIDs version */ IDTVEC(ipi_invlpg) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax movq tlb_shoot_addr1, %rax invlpg (%rax) @@ -682,11 +716,11 @@ END(Xipi_invlpg) /* invalidate a range of pages, no PCIDs version */ IDTVEC(ipi_invlrange) + ioapic_asm_ack_fast_ipi() + pushq %rax pushq %rdx - ioapic_asm_ack() - movq tlb_shoot_addr1, %rax movq tlb_shoot_addr2, %rdx 1: invlpg (%rax) @@ -706,9 +740,9 @@ END(Xipi_invlrange) * Invalidate the userspace PCIDs. */ IDTVEC(ipi_invltlb_pcid) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax /* set the type */ movl $INVPCID_PCID,%eax @@ -740,9 +774,9 @@ END(Xipi_invltlb_pcid) * while userspace VAs are present in PCIDs 1 and 2. */ IDTVEC(ipi_invlpg_pcid) - pushq %rax + ioapic_asm_ack_fast_ipi() - ioapic_asm_ack() + pushq %rax /* space for the INVPCID descriptor */ subq $16,%rsp @@ -777,12 +811,12 @@ END(Xipi_invlpg_pcid) * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2. */ IDTVEC(ipi_invlrange_pcid) + ioapic_asm_ack_fast_ipi() + pushq %rax pushq %rdx pushq %rcx - ioapic_asm_ack() - /* space for the INVPCID descriptor */ subq $16,%rsp @@ -817,7 +851,7 @@ IDTVEC(ipi_invlrange_pcid) END(Xipi_invlrange_pcid) IDTVEC(ipi_wbinvd) - ioapic_asm_ack() + ioapic_asm_ack_fast_ipi() wbinvd diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h index 2ccb638a8e8..6b6bfee62e1 100644 --- a/sys/arch/amd64/include/codepatch.h +++ b/sys/arch/amd64/include/codepatch.h @@ -70,6 +70,7 @@ void codepatch_disable(void); #define CPTAG_RETPOLINE_R11 15 #define CPTAG_RETPOLINE_R13 16 #define CPTAG_IBPB_NOP 17 +#define CPTAG_EOI_FAST_IPI 18 /* * stac/clac SMAP instructions have lfence like semantics. Let's diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index 8c71c424a8f..6b725ff796a 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -107,6 +107,7 @@ enum cpu_vendor { */ struct x86_64_tss; struct vcpu; +struct ghcb_sa; struct cpu_info { /* * The beginning of this structure in mapped in the userspace "u-k" @@ -219,6 +220,9 @@ struct cpu_info { struct uvm_pmr_cache ci_uvm; /* [o] page cache */ #endif + struct ghcb_sa *ci_ghcb; + paddr_t ci_ghcb_paddr; + struct ksensordev ci_sensordev; struct ksensor ci_sensor; struct ksensor ci_hz_sensor; diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h index fb1de0cb1b1..5b2669a36aa 100644 --- a/sys/arch/amd64/include/cpuvar.h +++ b/sys/arch/amd64/include/cpuvar.h @@ -71,6 +71,7 @@ struct cpu_functions { }; extern struct cpu_functions mp_cpu_funcs; +extern struct cpu_functions mp_sev_es_cpu_funcs; #define CPU_ROLE_SP 0 #define CPU_ROLE_BP 1 diff --git a/sys/arch/amd64/include/ghcb.h b/sys/arch/amd64/include/ghcb.h index bac63968d24..a39d5a9401f 100644 --- a/sys/arch/amd64/include/ghcb.h +++ b/sys/arch/amd64/include/ghcb.h @@ -111,9 +111,6 @@ struct ghcb_sync { #ifndef _LOCORE -extern vaddr_t ghcb_vaddr; -extern paddr_t ghcb_paddr; - struct ghcb_extra_regs { uint64_t exitcode; uint64_t exitinfo1; @@ -136,6 +133,9 @@ void ghcb_sync_in(struct trapframe *, struct ghcb_extra_regs *, struct ghcb_sa *, struct ghcb_sync *); void _ghcb_mem_rw(vaddr_t, int, void *, bool); void _ghcb_io_rw(uint16_t, int, uint32_t *, bool); +#ifdef MULTIPROCESSOR +int ghcb_get_ap_jump_table(paddr_t *); +#endif static inline uint8_t ghcb_mem_read_1(vaddr_t addr) diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h index 99b22923499..3288176fb22 100644 --- a/sys/arch/amd64/include/i82093reg.h +++ b/sys/arch/amd64/include/i82093reg.h @@ -114,7 +114,21 @@ #include -#define ioapic_asm_ack(num) \ +/* + * This macro must also work if swapgs has not been called on entry + * from user land. + */ +#define ioapic_asm_ack_fast_ipi(num) \ + CODEPATCH_START ;\ + movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ + CODEPATCH_END(CPTAG_EOI_FAST_IPI) + + +/* + * This macro assumes that swapgs has already been called (e.g. by + * INTRENTRY). + */ +#define ioapic_asm_ack(num) \ CODEPATCH_START ;\ movl $0,(local_apic+LAPIC_EOI)(%rip) ;\ CODEPATCH_END(CPTAG_EOI) diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index 25b1618ad1f..23ee60eb465 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -271,6 +271,7 @@ */ #define SEV_VMGEXIT_MMIO_READ 0x80000001 #define SEV_VMGEXIT_MMIO_WRITE 0x80000002 +#define SEV_VMGEXIT_AP_JUMP_TABLE 0x80000005 #ifndef _LOCORE diff --git a/sys/dev/acpi/acpimadt.c b/sys/dev/acpi/acpimadt.c index 275f2b1e6ce..f9f3a0a6538 100644 --- a/sys/dev/acpi/acpimadt.c +++ b/sys/dev/acpi/acpimadt.c @@ -263,6 +263,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux) caa.cpu_acpi_proc_id = entry->madt_lapic.acpi_proc_id; #ifdef MULTIPROCESSOR caa.cpu_func = &mp_cpu_funcs; +#ifdef __amd64__ + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + caa.cpu_func = &mp_sev_es_cpu_funcs; +#endif #endif #ifdef __i386__ /* @@ -318,6 +322,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux) caa.cpu_acpi_proc_id = entry->madt_x2apic.acpi_proc_uid; #ifdef MULTIPROCESSOR caa.cpu_func = &mp_cpu_funcs; +#ifdef __amd64__ + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + caa.cpu_func = &mp_sev_es_cpu_funcs; +#endif #endif #ifdef __i386__ /*