Download raw body.
SEV-ES multiprocessor support
Hi,
one remaining problem with SEV-ES is that we don't support multiprocessor
yet, and booting openbsd in a SEV-ES VM that has several VCPUs hangs at
cpu3 at mainbus0: apid 3 (application processor)
cpu3: failed to become ready
cpu3: failed to identify
Sometimes it continues after some time, but often it does not. I am not
sure if the problem is on our side or if there is some error handling
missing in qemu/KVM. Even if it does not hang, some things do not work
correctly, like sysctl hw.ncpu is wrong, top gives warnings, ...
In any case, I think this should be fixed somehow before the release, in
order to avoid support requests on the lists. There are two ways forward:
1) try to get SEV-ES MP support finished before the release.
2) commit some workaround that prevents openbsd from trying to use the
application processors if SEV-ES is enabled. Likely in cpu_match().
The diff that implements MP support is attached below. With this diff,
openbsd works for me in a 4 VCPU VM with SEV-ES enabled.
There is also the question if we actually need MP support for SEV-ES.
SEV-ES is just a intermediate step and in the end, most people will want
to use SEV-SNP (supported in Zen 3 Epyc CPUs and later). MP CPU bringup is
again a bit different with SEV-SNP compared to SEV-ES, though the larger
part of the diff is needed for both variants. In my opinion, skipping MP
support for SEV-ES and only implementing it for SEV-SNP later is also an
option.
I doubt there is enough time for 1). But I could start splitting the diff
into reviewable parts and we will see how far we get.
What do you think?
Cheers,
Stefan
diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 2611859f3f5..247f7b8cff1 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -95,6 +95,7 @@
#include <machine/gdt.h>
#include <machine/pio.h>
#include <machine/vmmvar.h>
+#include <machine/ghcb.h>
#if NLAPIC > 0
#include <machine/i82489reg.h>
@@ -438,6 +439,10 @@ int mp_cpu_start(struct cpu_info *);
void mp_cpu_start_cleanup(struct cpu_info *);
struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
mp_cpu_start_cleanup };
+int mp_sev_es_cpu_start(struct cpu_info *);
+void mp_sev_es_cpu_start_cleanup(struct cpu_info *);
+struct cpu_functions mp_sev_es_cpu_funcs = { mp_sev_es_cpu_start, NULL,
+ mp_sev_es_cpu_start_cleanup };
#endif /* MULTIPROCESSOR */
const struct cfattach cpu_ca = {
@@ -606,6 +611,27 @@ cpu_attach(struct device *parent, struct device *self, void *aux)
ci->ci_tlog_base = malloc(sizeof(struct tlog),
M_DEVBUF, M_WAITOK);
#endif
+
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) {
+ struct ghcb_sa *ghcb_va = NULL;
+ struct vm_page *ghcb_page;
+
+ ghcb_page = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+ if (ghcb_page == NULL)
+ panic("failed to allocate GHCB page");
+
+ ghcb_va = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
+ if (ghcb_va == NULL)
+ panic("failed to allocate virtual GHCB address");
+
+ pmap_kenter_pa((vaddr_t)ghcb_va, ghcb_page->phys_addr | PMAP_NOCRYPT,
+ PROT_READ | PROT_WRITE);
+
+ ci->ci_ghcb_paddr = ghcb_page->phys_addr;
+ ci->ci_ghcb = ghcb_va;
+
+ memset(ghcb_va, 0, PAGE_SIZE);
+ }
} else {
ci = &cpu_info_primary;
#if defined(MULTIPROCESSOR)
@@ -1031,6 +1057,24 @@ cpu_hatch(void *v)
struct cpu_info *ci = (struct cpu_info *)v;
int s;
+ /* We need the GSBASE MSR for the vctrap handler to work.
+ * CPUID will trap into the #VC trap handler on AMD SEV-ES.
+ */
+ cpu_init_msrs(ci);
+
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) {
+ extern int x2apic_enabled;
+
+ /* Load IDT early for #VC handler */
+ cpu_init_idt();
+ if (x2apic_enabled) {
+ /* Now that we have a #VC handler, we are able
+ * to enable x2APIC.
+ */
+ wrmsr(MSR_APICBASE, rdmsr(MSR_APICBASE) | APICBASE_ENABLE_X2APIC);
+ }
+ }
+
{
uint32_t vendor[4];
int level;
@@ -1040,7 +1084,6 @@ cpu_hatch(void *v)
cpu_set_vendor(ci, level, (const char *)vendor);
}
- cpu_init_msrs(ci);
#ifdef DEBUG
if (ci->ci_flags & CPUF_PRESENT)
@@ -1205,6 +1248,60 @@ mp_cpu_start_cleanup(struct cpu_info *ci)
outb(IO_RTC, NVRAM_RESET);
outb(IO_RTC+1, NVRAM_RESET_RST);
}
+
+paddr_t sev_es_jmp_tbl_addr;
+
+int mp_sev_es_cpu_start(struct cpu_info *ci)
+{
+ struct {
+ uint16_t reset_ip;
+ uint16_t reset_cs;
+ } *jmp_tbl;
+
+ if (sev_es_jmp_tbl_addr == 0) {
+ paddr_t jmp_tbl_paddr;
+
+ if (!ghcb_get_ap_jump_table(&jmp_tbl_paddr))
+ sev_es_jmp_tbl_addr = jmp_tbl_paddr & ~PAGE_MASK;
+ else
+ panic("failed to get AP jump table address");
+
+ /* Update the AP jump table only once */
+ jmp_tbl = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
+ if (jmp_tbl == NULL)
+ panic("failed to allocate virtual address");
+
+ pmap_kenter_pa((vaddr_t)jmp_tbl, sev_es_jmp_tbl_addr,
+ PROT_READ | PROT_WRITE);
+
+ jmp_tbl->reset_ip = 0;
+ jmp_tbl->reset_cs = MP_TRAMPOLINE >> 4;
+
+ pmap_kremove((vaddr_t)jmp_tbl, PAGE_SIZE);
+ km_free(jmp_tbl, PAGE_SIZE, &kv_any, &kp_none);
+ }
+
+ if (ci->ci_flags & CPUF_AP) {
+ x86_ipi_init(ci->ci_apicid);
+
+ delay(10000);
+
+ if (cpu_feature & CPUID_APIC) {
+ x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP);
+ delay(200);
+
+ x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP);
+ delay(200);
+ }
+ }
+
+ return 0;
+}
+
+void mp_sev_es_cpu_start_cleanup(struct cpu_info *ci)
+{
+ (void)ci;
+}
#endif /* MULTIPROCESSOR */
typedef void (vector)(void);
diff --git a/sys/arch/amd64/amd64/ghcb.c b/sys/arch/amd64/amd64/ghcb.c
index 2b0fa809570..aace7f28303 100644
--- a/sys/arch/amd64/amd64/ghcb.c
+++ b/sys/arch/amd64/amd64/ghcb.c
@@ -47,9 +47,6 @@ const uint64_t ghcb_sz_clear_masks[] = {
0xffffffffffffffffULL, 0xffffffffffffffffULL
};
-vaddr_t ghcb_vaddr;
-paddr_t ghcb_paddr;
-
/*
* ghcb_clear
*
@@ -254,6 +251,11 @@ ghcb_sync_in(struct trapframe *frame, struct ghcb_extra_regs *regs,
frame->tf_rdx |= (ghcb->v_rdx & ghcb_sz_masks[gsin->sz_d]);
}
+ if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO1))
+ regs->exitinfo1 = ghcb->v_sw_exitinfo1;
+ if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO2))
+ regs->exitinfo2 = ghcb->v_sw_exitinfo2;
+
if (regs && regs->data) {
data_sz = regs->data_sz;
KASSERT(data_sz <= sizeof(ghcb->v_sharedbuf));
@@ -303,14 +305,14 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read)
ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_READ;
ghcb_regs.exitinfo1 = paddr;
ghcb_regs.exitinfo2 = size;
- ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa,
- v_sharedbuf);
+ ghcb_regs.scratch = curcpu()->ci_ghcb_paddr +
+ offsetof(struct ghcb_sa, v_sharedbuf);
} else {
ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_WRITE;
ghcb_regs.exitinfo1 = paddr;
ghcb_regs.exitinfo2 = size;
- ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa,
- v_sharedbuf);
+ ghcb_regs.scratch = curcpu()->ci_ghcb_paddr +
+ offsetof(struct ghcb_sa, v_sharedbuf);
ghcb_regs.data = val;
ghcb_regs.data_sz = size;
}
@@ -322,10 +324,10 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read)
s = intr_disable();
- ghcb = (struct ghcb_sa *)ghcb_vaddr;
- ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
+ wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
- wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+ ghcb = curcpu()->ci_ghcb;
+ ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
vmgexit();
@@ -399,10 +401,10 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read)
s = intr_disable();
- ghcb = (struct ghcb_sa *)ghcb_vaddr;
+ ghcb = curcpu()->ci_ghcb;
ghcb_sync_out(&frame, &ghcb_regs, ghcb, &syncout);
- wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+ wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
vmgexit();
@@ -418,3 +420,55 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read)
if (read)
*val = frame.tf_rax;
}
+
+#ifdef MULTIPROCESSOR
+int
+ghcb_get_ap_jump_table(paddr_t *jmp_tbl_addr)
+{
+ struct ghcb_sa *ghcb;
+ struct ghcb_sync syncout, syncin;
+ struct ghcb_extra_regs ghcb_regs;
+ unsigned long s;
+
+ memset(&syncout, 0, sizeof(syncout));
+ memset(&syncin, 0, sizeof(syncin));
+ memset(&ghcb_regs, 0, sizeof(ghcb_regs));
+
+ ghcb_regs.exitcode = SEV_VMGEXIT_AP_JUMP_TABLE;
+ ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout);
+ ghcb_regs.exitinfo1 = 1; // GET
+ ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout);
+ ghcb_regs.exitinfo2 = 0;
+ ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
+
+ ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncin);
+ ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncin);
+
+ s = intr_disable();
+
+ wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
+
+ ghcb = curcpu()->ci_ghcb;
+ ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
+
+ vmgexit();
+
+ if (ghcb_verify_bm(ghcb->valid_bitmap, syncin.valid_bitmap)) {
+ ghcb_clear(ghcb);
+ panic("invalid hypervisor response");
+ }
+
+ memset(&ghcb_regs, 0, sizeof(ghcb_regs));
+
+ ghcb_sync_in(NULL, &ghcb_regs, ghcb, &syncin);
+
+ intr_restore(s);
+
+ if (ghcb_regs.exitinfo1 == 0) {
+ *jmp_tbl_addr = ghcb_regs.exitinfo2;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#endif
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index f7fdb81ccca..80436294e6f 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -99,6 +99,7 @@ struct pic local_pic = {
};
extern int x2apic_eoi;
+extern int x2apic_eoi_swapgs;
int x2apic_enabled = 0;
u_int32_t x2apic_readreg(int reg);
@@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
#endif
x2apic_enabled = 1;
codepatch_call(CPTAG_EOI, &x2apic_eoi);
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
+ else
+ codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
va = (vaddr_t)&local_apic;
} else {
@@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
pte = kvtopte(va);
*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
invlpg(va);
+
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ panic("xAPIC mode not implemented for SEV-ES");
}
/*
diff --git a/sys/arch/amd64/amd64/locore0.S b/sys/arch/amd64/amd64/locore0.S
index 4533b19df2f..951da60b1d2 100644
--- a/sys/arch/amd64/amd64/locore0.S
+++ b/sys/arch/amd64/amd64/locore0.S
@@ -804,15 +804,6 @@ longmode_hi:
addq %rsi,%rdx
movq %rdx,atdevbase(%rip)
- /* Relocate GHCB. */
- movq cpu_sev_guestmode(%rip),%rax
- testq $SEV_STAT_ES_ENABLED,%rax
- jz .Lnoghcbreloc
- movq $(PROC0_GHCB_OFF+KERNBASE),%rdx
- addq %rsi,%rdx
- movq %rdx,ghcb_vaddr(%rip)
-
-.Lnoghcbreloc:
/* Record start of symbols */
movq $__kernel_bss_end, ssym(%rip)
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 3de32b26354..d86ec85db67 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -1342,9 +1342,10 @@ cpu_init_early_vctrap(paddr_t addr)
cpu_init_idt();
/* Tell vmm(4) about our GHCB. */
- ghcb_paddr = addr;
- memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE);
- wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+ cpu_info_primary.ci_ghcb_paddr = addr;
+ cpu_info_primary.ci_ghcb = (struct ghcb_sa *)(addr + KERNBASE);
+ memset(cpu_info_primary.ci_ghcb, 0, 2 * PAGE_SIZE);
+ wrmsr(MSR_SEV_GHCB, cpu_info_primary.ci_ghcb_paddr);
}
void
@@ -1388,6 +1389,7 @@ map_tramps(void)
extern u_char mp_tramp_data_start[];
extern u_char mp_tramp_data_end[];
extern u_int32_t mp_pdirpa;
+ extern u_int32_t mp_sev_guestmode;
#endif
/*
@@ -1429,6 +1431,13 @@ map_tramps(void)
*/
mp_pdirpa = tramp_pdirpa;
+ /*
+ * We need to introduce and set mp_sev_guestmode since the
+ * global cpu_sev_guestmode variable may not be accessable in
+ * 16 or 32 bit mode.
+ */
+ mp_sev_guestmode = cpu_sev_guestmode;
+
/* Unmap, will be remapped in cpu_start_secondary */
pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE);
pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE);
diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S
index 96247c8e890..838168843bf 100644
--- a/sys/arch/amd64/amd64/mptramp.S
+++ b/sys/arch/amd64/amd64/mptramp.S
@@ -143,6 +143,14 @@ _TRMP_LABEL(.Lmp_startup)
rdmsr
movl %edx, %edi # %edx is needed by wrmsr below
+ # If SEV is enabled, we can assume that NXE is supported and we cannot
+ # do cpuid, yet.
+ movl $mp_sev_guestmode, %edx
+ movl (%edx), %edx
+ xorl %eax, %eax
+ testl %edx, %edx
+ jnz 4f
+
# Check if we need to enable NXE
movl $0x80000001, %eax
cpuid
@@ -150,6 +158,7 @@ _TRMP_LABEL(.Lmp_startup)
xorl %eax,%eax
testl %edx, %edx
jz 1f
+4:
orl $EFER_NXE, %eax
1:
orl $(EFER_LME|EFER_SCE), %eax
@@ -192,6 +201,31 @@ END(cpu_spinup_trampoline)
.text
GENTRY(cpu_spinup_finish)
+ movl $mp_sev_guestmode, %eax
+ movl (%eax), %eax
+ testl $SEV_STAT_ES_ENABLED, %eax
+ jz 5f
+
+ # We are in SEV-ES mode. MSR or MMIO access is only possible
+ # through a GHCB. Query APIC ID via CPUID leaf 1 EBX
+ movl $1, %edx
+ # EBX == 1, function 4 cpuid request
+ movl $(1 << 30 | 4), %eax
+ movl $MSR_SEV_GHCB, %ecx
+ wrmsr
+ rep vmmcall
+ rdmsr
+ # Make sure the query was successful
+ cmpl $(1 << 30 | 5), %eax
+ jne .Lsev_es_terminate
+
+ movl %edx, %eax
+ shrl $24, %eax
+ # Skip x2apic initialization if running on SEV-ES or higher.
+ # We cannot do rdmsr/wrmsr without a GHCB. Will be done later in cpu_hatch.
+ jmp 2f
+
+5:
movl x2apic_enabled,%eax
testl %eax,%eax
jz 1f
@@ -234,9 +268,18 @@ GENTRY(cpu_spinup_finish)
movq %rax,%cr0
call cpu_hatch
movq $0,-8(%rsp)
-END(cpu_spinup_finish)
/* NOTREACHED */
+.Lsev_es_terminate:
+ xorl %edx, %edx
+ movl $0x100, %eax
+ movl $MSR_SEV_GHCB, %ecx
+ wrmsr
+ rep vmmcall
+ hlt
+ jmp .Lsev_es_terminate
+END(cpu_spinup_finish)
+
.section .rodata
.type mp_tramp_data_start,@object
mp_tramp_data_start:
@@ -250,6 +293,12 @@ _TRMP_DATA_LABEL(mp_pdirpa)
.long 0
.size mp_pdirpa,4
+ .global mp_sev_guestmode
+ .type mp_sev_guestmode,@object
+_TRMP_DATA_LABEL(mp_sev_guestmode)
+ .long 0
+ .size mp_sev_guestmode,4
+
_TRMP_DATA_LABEL(.Lmptramp_gdt32)
.quad 0x0000000000000000
diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
index 396366de89b..49c7c0ffb70 100644
--- a/sys/arch/amd64/amd64/trap.c
+++ b/sys/arch/amd64/amd64/trap.c
@@ -426,6 +426,17 @@ vctrap(struct trapframe *frame, int user)
}
break;
}
+ case SVM_VMEXIT_WBINVD:
+ /* There is no special GHCB request for WBNOINVD.
+ * Signal WBINVD to emulate WBNOINVD.
+ */
+ if (*rip == 0xf3)
+ frame->tf_rip += 3;
+ else
+ frame->tf_rip += 2;
+ break;
+ case SVM_VMEXIT_NPF:
+ panic("Unexptected SEV nested page fault");
default:
panic("invalid exit code 0x%llx", ghcb_regs.exitcode);
}
@@ -436,10 +447,10 @@ vctrap(struct trapframe *frame, int user)
ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
/* Sync out to GHCB */
- ghcb = (struct ghcb_sa *)ghcb_vaddr;
+ ghcb = curcpu()->ci_ghcb;
ghcb_sync_out(frame, &ghcb_regs, ghcb, &syncout);
- wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+ wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
/* Call hypervisor. */
vmgexit();
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 8b82db6b4f6..cbfe817ea9c 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -590,6 +590,40 @@ KUENTRY(x2apic_eoi)
lfence
END(x2apic_eoi)
+/*
+ * With SEV-ES the wrmsr instruction traps into the #VC handler which
+ * needs the kernel GS_BASE. So if we come from the userland, we need to
+ * do swapgs. The fast IPI handler does not perform swapgs, so we need
+ * to do it here. In order to detect whether we come from user or kernel
+ * land, this function MUST be called before %rsp is modified.
+ */
+KUENTRY(x2apic_eoi_swapgs)
+ /* If the come from userland, go swapgs to enabled curcpu() */
+ testb $SEL_RPL,16(%rsp)
+ jz 1f
+ swapgs
+ FENCE_SWAPGS_MIS_TAKEN
+1:
+ pushq %rax
+ pushq %rcx
+ pushq %rdx
+ mov $MSR_X2APIC_EOI,%ecx
+ mov $0,%eax
+ mov $0,%edx
+ wrmsr
+ popq %rdx
+ popq %rcx
+ popq %rax
+
+ testb $SEL_RPL,16(%rsp)
+ jz 2f
+ swapgs
+ FENCE_SWAPGS_MIS_TAKEN
+2:
+ retq
+ lfence
+END(x2apic_eoi_swapgs)
+
#if NLAPIC > 0
#ifdef MULTIPROCESSOR
KIDTVEC(recurse_lapic_ipi)
@@ -629,9 +663,9 @@ END(Xresume_lapic_ipi)
*/
/* invalidate the entire TLB, no PCIDs version */
IDTVEC(ipi_invltlb)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
movq %cr3, %rax
movq %rax, %cr3
@@ -646,11 +680,11 @@ END(Xipi_invltlb)
#if NVMM > 0
/* Invalidate VMX EPT */
IDTVEC(ipi_invept)
+ ioapic_asm_ack_fast_ipi()
+
pushq %rax
pushq %rdx
- ioapic_asm_ack()
-
movq $ept_shoot_vid, %rax
movq ept_shoot_mode, %rdx
invept (%rax), %rdx
@@ -666,9 +700,9 @@ END(Xipi_invept)
/* invalidate a single page, no PCIDs version */
IDTVEC(ipi_invlpg)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
movq tlb_shoot_addr1, %rax
invlpg (%rax)
@@ -682,11 +716,11 @@ END(Xipi_invlpg)
/* invalidate a range of pages, no PCIDs version */
IDTVEC(ipi_invlrange)
+ ioapic_asm_ack_fast_ipi()
+
pushq %rax
pushq %rdx
- ioapic_asm_ack()
-
movq tlb_shoot_addr1, %rax
movq tlb_shoot_addr2, %rdx
1: invlpg (%rax)
@@ -706,9 +740,9 @@ END(Xipi_invlrange)
* Invalidate the userspace PCIDs.
*/
IDTVEC(ipi_invltlb_pcid)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
/* set the type */
movl $INVPCID_PCID,%eax
@@ -740,9 +774,9 @@ END(Xipi_invltlb_pcid)
* while userspace VAs are present in PCIDs 1 and 2.
*/
IDTVEC(ipi_invlpg_pcid)
- pushq %rax
+ ioapic_asm_ack_fast_ipi()
- ioapic_asm_ack()
+ pushq %rax
/* space for the INVPCID descriptor */
subq $16,%rsp
@@ -777,12 +811,12 @@ END(Xipi_invlpg_pcid)
* PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
*/
IDTVEC(ipi_invlrange_pcid)
+ ioapic_asm_ack_fast_ipi()
+
pushq %rax
pushq %rdx
pushq %rcx
- ioapic_asm_ack()
-
/* space for the INVPCID descriptor */
subq $16,%rsp
@@ -817,7 +851,7 @@ IDTVEC(ipi_invlrange_pcid)
END(Xipi_invlrange_pcid)
IDTVEC(ipi_wbinvd)
- ioapic_asm_ack()
+ ioapic_asm_ack_fast_ipi()
wbinvd
diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
index 2ccb638a8e8..6b6bfee62e1 100644
--- a/sys/arch/amd64/include/codepatch.h
+++ b/sys/arch/amd64/include/codepatch.h
@@ -70,6 +70,7 @@ void codepatch_disable(void);
#define CPTAG_RETPOLINE_R11 15
#define CPTAG_RETPOLINE_R13 16
#define CPTAG_IBPB_NOP 17
+#define CPTAG_EOI_FAST_IPI 18
/*
* stac/clac SMAP instructions have lfence like semantics. Let's
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index 8c71c424a8f..6b725ff796a 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -107,6 +107,7 @@ enum cpu_vendor {
*/
struct x86_64_tss;
struct vcpu;
+struct ghcb_sa;
struct cpu_info {
/*
* The beginning of this structure in mapped in the userspace "u-k"
@@ -219,6 +220,9 @@ struct cpu_info {
struct uvm_pmr_cache ci_uvm; /* [o] page cache */
#endif
+ struct ghcb_sa *ci_ghcb;
+ paddr_t ci_ghcb_paddr;
+
struct ksensordev ci_sensordev;
struct ksensor ci_sensor;
struct ksensor ci_hz_sensor;
diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h
index fb1de0cb1b1..5b2669a36aa 100644
--- a/sys/arch/amd64/include/cpuvar.h
+++ b/sys/arch/amd64/include/cpuvar.h
@@ -71,6 +71,7 @@ struct cpu_functions {
};
extern struct cpu_functions mp_cpu_funcs;
+extern struct cpu_functions mp_sev_es_cpu_funcs;
#define CPU_ROLE_SP 0
#define CPU_ROLE_BP 1
diff --git a/sys/arch/amd64/include/ghcb.h b/sys/arch/amd64/include/ghcb.h
index bac63968d24..a39d5a9401f 100644
--- a/sys/arch/amd64/include/ghcb.h
+++ b/sys/arch/amd64/include/ghcb.h
@@ -111,9 +111,6 @@ struct ghcb_sync {
#ifndef _LOCORE
-extern vaddr_t ghcb_vaddr;
-extern paddr_t ghcb_paddr;
-
struct ghcb_extra_regs {
uint64_t exitcode;
uint64_t exitinfo1;
@@ -136,6 +133,9 @@ void ghcb_sync_in(struct trapframe *, struct ghcb_extra_regs *,
struct ghcb_sa *, struct ghcb_sync *);
void _ghcb_mem_rw(vaddr_t, int, void *, bool);
void _ghcb_io_rw(uint16_t, int, uint32_t *, bool);
+#ifdef MULTIPROCESSOR
+int ghcb_get_ap_jump_table(paddr_t *);
+#endif
static inline uint8_t
ghcb_mem_read_1(vaddr_t addr)
diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
index 99b22923499..3288176fb22 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -114,7 +114,21 @@
#include <machine/codepatch.h>
-#define ioapic_asm_ack(num) \
+/*
+ * This macro must also work if swapgs has not been called on entry
+ * from user land.
+ */
+#define ioapic_asm_ack_fast_ipi(num) \
+ CODEPATCH_START ;\
+ movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
+ CODEPATCH_END(CPTAG_EOI_FAST_IPI)
+
+
+/*
+ * This macro assumes that swapgs has already been called (e.g. by
+ * INTRENTRY).
+ */
+#define ioapic_asm_ack(num) \
CODEPATCH_START ;\
movl $0,(local_apic+LAPIC_EOI)(%rip) ;\
CODEPATCH_END(CPTAG_EOI)
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 25b1618ad1f..23ee60eb465 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -271,6 +271,7 @@
*/
#define SEV_VMGEXIT_MMIO_READ 0x80000001
#define SEV_VMGEXIT_MMIO_WRITE 0x80000002
+#define SEV_VMGEXIT_AP_JUMP_TABLE 0x80000005
#ifndef _LOCORE
diff --git a/sys/dev/acpi/acpimadt.c b/sys/dev/acpi/acpimadt.c
index 275f2b1e6ce..f9f3a0a6538 100644
--- a/sys/dev/acpi/acpimadt.c
+++ b/sys/dev/acpi/acpimadt.c
@@ -263,6 +263,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux)
caa.cpu_acpi_proc_id = entry->madt_lapic.acpi_proc_id;
#ifdef MULTIPROCESSOR
caa.cpu_func = &mp_cpu_funcs;
+#ifdef __amd64__
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ caa.cpu_func = &mp_sev_es_cpu_funcs;
+#endif
#endif
#ifdef __i386__
/*
@@ -318,6 +322,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux)
caa.cpu_acpi_proc_id = entry->madt_x2apic.acpi_proc_uid;
#ifdef MULTIPROCESSOR
caa.cpu_func = &mp_cpu_funcs;
+#ifdef __amd64__
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ caa.cpu_func = &mp_sev_es_cpu_funcs;
+#endif
#endif
#ifdef __i386__
/*
SEV-ES multiprocessor support