Index | Thread | Search

From:
Stefan Fritsch <sf@sfritsch.de>
Subject:
SEV-ES multiprocessor support
To:
tech@openbsd.org
Cc:
Mike Larkin <mlarkin@nested.page>
Date:
Thu, 18 Sep 2025 13:17:32 +0200

Download raw body.

Thread
Hi,

one remaining problem with SEV-ES is that we don't support multiprocessor 
yet, and booting openbsd in a SEV-ES VM that has several VCPUs hangs at

cpu3 at mainbus0: apid 3 (application processor)
cpu3: failed to become ready
cpu3: failed to identify

Sometimes it continues after some time, but often it does not. I am not 
sure if the problem is on our side or if there is some error handling 
missing in qemu/KVM. Even if it does not hang, some things do not work 
correctly, like sysctl hw.ncpu is wrong, top gives warnings, ...

In any case, I think this should be fixed somehow before the release, in 
order to avoid support requests on the lists. There are two ways forward:

1) try to get SEV-ES MP support finished before the release.

2) commit some workaround that prevents openbsd from trying to use the 
application processors if SEV-ES is enabled. Likely in cpu_match().

The diff that implements MP support is attached below. With this diff, 
openbsd works for me in a 4 VCPU VM with SEV-ES enabled.

There is also the question if we actually need MP support for SEV-ES. 
SEV-ES is just a intermediate step and in the end, most people will want 
to use SEV-SNP (supported in Zen 3 Epyc CPUs and later). MP CPU bringup is 
again a bit different with SEV-SNP compared to SEV-ES, though the larger 
part of the diff is needed for both variants. In my opinion, skipping MP 
support for SEV-ES and only implementing it for SEV-SNP later is also an 
option.

I doubt there is enough time for 1). But I could start splitting the diff 
into reviewable parts and we will see how far we get.

What do you think?

Cheers,
Stefan


diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 2611859f3f5..247f7b8cff1 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -95,6 +95,7 @@
 #include <machine/gdt.h>
 #include <machine/pio.h>
 #include <machine/vmmvar.h>
+#include <machine/ghcb.h>
 
 #if NLAPIC > 0
 #include <machine/i82489reg.h>
@@ -438,6 +439,10 @@ int mp_cpu_start(struct cpu_info *);
 void mp_cpu_start_cleanup(struct cpu_info *);
 struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
 				      mp_cpu_start_cleanup };
+int mp_sev_es_cpu_start(struct cpu_info *);
+void mp_sev_es_cpu_start_cleanup(struct cpu_info *);
+struct cpu_functions mp_sev_es_cpu_funcs = { mp_sev_es_cpu_start, NULL,
+					     mp_sev_es_cpu_start_cleanup };
 #endif /* MULTIPROCESSOR */
 
 const struct cfattach cpu_ca = {
@@ -606,6 +611,27 @@ cpu_attach(struct device *parent, struct device *self, void *aux)
 		ci->ci_tlog_base = malloc(sizeof(struct tlog),
 		    M_DEVBUF, M_WAITOK);
 #endif
+
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) {
+			struct ghcb_sa *ghcb_va = NULL;
+			struct vm_page *ghcb_page;
+
+			ghcb_page = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
+			if (ghcb_page == NULL)
+				panic("failed to allocate GHCB page");
+
+			ghcb_va = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
+			if (ghcb_va == NULL)
+				panic("failed to allocate virtual GHCB address");
+
+			pmap_kenter_pa((vaddr_t)ghcb_va, ghcb_page->phys_addr | PMAP_NOCRYPT,
+				PROT_READ | PROT_WRITE);
+
+			ci->ci_ghcb_paddr = ghcb_page->phys_addr;
+			ci->ci_ghcb = ghcb_va;
+
+			memset(ghcb_va, 0, PAGE_SIZE);
+		}
 	} else {
 		ci = &cpu_info_primary;
 #if defined(MULTIPROCESSOR)
@@ -1031,6 +1057,24 @@ cpu_hatch(void *v)
 	struct cpu_info *ci = (struct cpu_info *)v;
 	int s;
 
+	/* We need the GSBASE MSR for the vctrap handler to work.
+	 * CPUID will trap into the #VC trap handler on AMD SEV-ES.
+	 */
+	cpu_init_msrs(ci);
+
+	if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) {
+		extern int x2apic_enabled;
+
+		/* Load IDT early for #VC handler */
+		cpu_init_idt();
+		if (x2apic_enabled) {
+			/* Now that we have a #VC handler, we are able
+			 * to enable x2APIC.
+			 */
+			wrmsr(MSR_APICBASE, rdmsr(MSR_APICBASE) | APICBASE_ENABLE_X2APIC);
+		}
+	}
+
 	{
 		uint32_t vendor[4];
 		int level;
@@ -1040,7 +1084,6 @@ cpu_hatch(void *v)
 		cpu_set_vendor(ci, level, (const char *)vendor);
 	}
 
-	cpu_init_msrs(ci);
 
 #ifdef DEBUG
 	if (ci->ci_flags & CPUF_PRESENT)
@@ -1205,6 +1248,60 @@ mp_cpu_start_cleanup(struct cpu_info *ci)
 	outb(IO_RTC, NVRAM_RESET);
 	outb(IO_RTC+1, NVRAM_RESET_RST);
 }
+
+paddr_t sev_es_jmp_tbl_addr;
+
+int mp_sev_es_cpu_start(struct cpu_info *ci)
+{
+	struct {
+		uint16_t	reset_ip;
+		uint16_t	reset_cs;
+	} *jmp_tbl;
+
+	if (sev_es_jmp_tbl_addr == 0) {
+		paddr_t jmp_tbl_paddr;
+
+		if (!ghcb_get_ap_jump_table(&jmp_tbl_paddr))
+			sev_es_jmp_tbl_addr = jmp_tbl_paddr & ~PAGE_MASK;
+		else
+			panic("failed to get AP jump table address");
+
+		/* Update the AP jump table only once */
+		jmp_tbl = km_alloc(PAGE_SIZE, &kv_any, &kp_none, &kd_nowait);
+		if (jmp_tbl == NULL)
+			panic("failed to allocate virtual address");
+
+		pmap_kenter_pa((vaddr_t)jmp_tbl, sev_es_jmp_tbl_addr,
+		    PROT_READ | PROT_WRITE);
+
+		jmp_tbl->reset_ip = 0;
+		jmp_tbl->reset_cs = MP_TRAMPOLINE >> 4;
+
+		pmap_kremove((vaddr_t)jmp_tbl, PAGE_SIZE);
+		km_free(jmp_tbl, PAGE_SIZE, &kv_any, &kp_none);
+	}
+
+	if (ci->ci_flags & CPUF_AP) {
+		x86_ipi_init(ci->ci_apicid);
+
+		delay(10000);
+
+		if (cpu_feature & CPUID_APIC) {
+			x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP);
+			delay(200);
+
+			x86_ipi(0, ci->ci_apicid, LAPIC_DLMODE_STARTUP);
+			delay(200);
+		}
+	}
+
+	return 0;
+}
+
+void mp_sev_es_cpu_start_cleanup(struct cpu_info *ci)
+{
+	(void)ci;
+}
 #endif	/* MULTIPROCESSOR */
 
 typedef void (vector)(void);
diff --git a/sys/arch/amd64/amd64/ghcb.c b/sys/arch/amd64/amd64/ghcb.c
index 2b0fa809570..aace7f28303 100644
--- a/sys/arch/amd64/amd64/ghcb.c
+++ b/sys/arch/amd64/amd64/ghcb.c
@@ -47,9 +47,6 @@ const uint64_t ghcb_sz_clear_masks[] = {
     0xffffffffffffffffULL, 0xffffffffffffffffULL
 };
 
-vaddr_t ghcb_vaddr;
-paddr_t ghcb_paddr;
-
 /*
  * ghcb_clear
  *
@@ -254,6 +251,11 @@ ghcb_sync_in(struct trapframe *frame, struct ghcb_extra_regs *regs,
 		frame->tf_rdx |= (ghcb->v_rdx & ghcb_sz_masks[gsin->sz_d]);
 	}
 
+	if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO1))
+		regs->exitinfo1 = ghcb->v_sw_exitinfo1;
+	if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_SW_EXITINFO2))
+		regs->exitinfo2 = ghcb->v_sw_exitinfo2;
+
 	if (regs && regs->data) {
 		data_sz = regs->data_sz;
 		KASSERT(data_sz <= sizeof(ghcb->v_sharedbuf));
@@ -303,14 +305,14 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read)
 		ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_READ;
 		ghcb_regs.exitinfo1 = paddr;
 		ghcb_regs.exitinfo2 = size;
-		ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa,
-		    v_sharedbuf);
+		ghcb_regs.scratch = curcpu()->ci_ghcb_paddr +
+		    offsetof(struct ghcb_sa, v_sharedbuf);
 	} else {
 		ghcb_regs.exitcode = SEV_VMGEXIT_MMIO_WRITE;
 		ghcb_regs.exitinfo1 = paddr;
 		ghcb_regs.exitinfo2 = size;
-		ghcb_regs.scratch = ghcb_paddr + offsetof(struct ghcb_sa,
-		    v_sharedbuf);
+		ghcb_regs.scratch = curcpu()->ci_ghcb_paddr +
+		    offsetof(struct ghcb_sa, v_sharedbuf);
 		ghcb_regs.data = val;
 		ghcb_regs.data_sz = size;
 	}
@@ -322,10 +324,10 @@ _ghcb_mem_rw(vaddr_t addr, int valsz, void *val, bool read)
 
 	s = intr_disable();
 
-	ghcb = (struct ghcb_sa *)ghcb_vaddr;
-	ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
+	wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
 
-	wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+	ghcb = curcpu()->ci_ghcb;
+	ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
 
 	vmgexit();
 
@@ -399,10 +401,10 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read)
 
 	s = intr_disable();
 
-	ghcb = (struct ghcb_sa *)ghcb_vaddr;
+	ghcb = curcpu()->ci_ghcb;
 	ghcb_sync_out(&frame, &ghcb_regs, ghcb, &syncout);
 
-	wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+	wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
 
 	vmgexit();
 
@@ -418,3 +420,55 @@ _ghcb_io_rw(uint16_t port, int valsz, uint32_t *val, bool read)
 	if (read)
 		*val = frame.tf_rax;
 }
+
+#ifdef MULTIPROCESSOR
+int
+ghcb_get_ap_jump_table(paddr_t *jmp_tbl_addr)
+{
+	struct ghcb_sa *ghcb;
+	struct ghcb_sync syncout, syncin;
+	struct ghcb_extra_regs ghcb_regs;
+	unsigned long s;
+
+	memset(&syncout, 0, sizeof(syncout));
+	memset(&syncin, 0, sizeof(syncin));
+	memset(&ghcb_regs, 0, sizeof(ghcb_regs));
+
+	ghcb_regs.exitcode = SEV_VMGEXIT_AP_JUMP_TABLE;
+	ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout);
+	ghcb_regs.exitinfo1 = 1; // GET
+	ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout);
+	ghcb_regs.exitinfo2 = 0;
+	ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
+
+	ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncin);
+	ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncin);
+
+	s = intr_disable();
+
+	wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
+
+	ghcb = curcpu()->ci_ghcb;
+	ghcb_sync_out(NULL, &ghcb_regs, ghcb, &syncout);
+
+	vmgexit();
+
+	if (ghcb_verify_bm(ghcb->valid_bitmap, syncin.valid_bitmap)) {
+		ghcb_clear(ghcb);
+		panic("invalid hypervisor response");
+	}
+
+	memset(&ghcb_regs, 0, sizeof(ghcb_regs));
+
+	ghcb_sync_in(NULL, &ghcb_regs, ghcb, &syncin);
+
+	intr_restore(s);
+
+	if (ghcb_regs.exitinfo1 == 0) {
+		*jmp_tbl_addr = ghcb_regs.exitinfo2;
+		return 0;
+	} else {
+		return 1;
+	}
+}
+#endif
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index f7fdb81ccca..80436294e6f 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -99,6 +99,7 @@ struct pic local_pic = {
 };
 
 extern int x2apic_eoi;
+extern int x2apic_eoi_swapgs;
 int x2apic_enabled = 0;
 
 u_int32_t x2apic_readreg(int reg);
@@ -207,6 +208,10 @@ lapic_map(paddr_t lapic_base)
 #endif
 		x2apic_enabled = 1;
 		codepatch_call(CPTAG_EOI, &x2apic_eoi);
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi_swapgs);
+		else
+			codepatch_call(CPTAG_EOI_FAST_IPI, &x2apic_eoi);
 
 		va = (vaddr_t)&local_apic;
 	} else {
@@ -222,6 +227,9 @@ lapic_map(paddr_t lapic_base)
 		pte = kvtopte(va);
 		*pte = lapic_base | PG_RW | PG_V | PG_N | PG_G | pg_nx;
 		invlpg(va);
+
+		if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+			panic("xAPIC mode not implemented for SEV-ES");
 	}
 
 	/*
diff --git a/sys/arch/amd64/amd64/locore0.S b/sys/arch/amd64/amd64/locore0.S
index 4533b19df2f..951da60b1d2 100644
--- a/sys/arch/amd64/amd64/locore0.S
+++ b/sys/arch/amd64/amd64/locore0.S
@@ -804,15 +804,6 @@ longmode_hi:
 	addq	%rsi,%rdx
 	movq	%rdx,atdevbase(%rip)
 
-	/* Relocate GHCB. */
-	movq	cpu_sev_guestmode(%rip),%rax
-	testq	$SEV_STAT_ES_ENABLED,%rax
-	jz	.Lnoghcbreloc
-	movq	$(PROC0_GHCB_OFF+KERNBASE),%rdx
-	addq	%rsi,%rdx
-	movq	%rdx,ghcb_vaddr(%rip)
-
-.Lnoghcbreloc:
 	/* Record start of symbols */
 	movq	$__kernel_bss_end, ssym(%rip)
 
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 3de32b26354..d86ec85db67 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -1342,9 +1342,10 @@ cpu_init_early_vctrap(paddr_t addr)
 	cpu_init_idt();
 
 	/* Tell vmm(4) about our GHCB. */
-	ghcb_paddr = addr;
-	memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE);
-	wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+	cpu_info_primary.ci_ghcb_paddr = addr;
+	cpu_info_primary.ci_ghcb = (struct ghcb_sa *)(addr + KERNBASE);
+	memset(cpu_info_primary.ci_ghcb, 0, 2 * PAGE_SIZE);
+	wrmsr(MSR_SEV_GHCB, cpu_info_primary.ci_ghcb_paddr);
 }
 
 void
@@ -1388,6 +1389,7 @@ map_tramps(void)
 	extern u_char mp_tramp_data_start[];
 	extern u_char mp_tramp_data_end[];
 	extern u_int32_t mp_pdirpa;
+	extern u_int32_t mp_sev_guestmode;
 #endif
 
 	/*
@@ -1429,6 +1431,13 @@ map_tramps(void)
 	 */
 	mp_pdirpa = tramp_pdirpa;
 
+	/*
+	 * We need to introduce and set mp_sev_guestmode since the
+	 * global cpu_sev_guestmode variable may not be accessable in
+	 * 16 or 32 bit mode.
+	 */
+	mp_sev_guestmode = cpu_sev_guestmode;
+
 	/* Unmap, will be remapped in cpu_start_secondary */
 	pmap_kremove(MP_TRAMPOLINE, PAGE_SIZE);
 	pmap_kremove(MP_TRAMP_DATA, PAGE_SIZE);
diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S
index 96247c8e890..838168843bf 100644
--- a/sys/arch/amd64/amd64/mptramp.S
+++ b/sys/arch/amd64/amd64/mptramp.S
@@ -143,6 +143,14 @@ _TRMP_LABEL(.Lmp_startup)
 	rdmsr
 	movl	%edx, %edi		# %edx is needed by wrmsr below
 
+	# If SEV is enabled, we can assume that NXE is supported and we cannot
+	# do cpuid, yet.
+	movl	$mp_sev_guestmode, %edx
+	movl	(%edx), %edx
+	xorl	%eax, %eax
+	testl	%edx, %edx
+	jnz	4f
+
 	# Check if we need to enable NXE
 	movl	$0x80000001, %eax
 	cpuid
@@ -150,6 +158,7 @@ _TRMP_LABEL(.Lmp_startup)
 	xorl	%eax,%eax
 	testl	%edx, %edx
 	jz	1f
+4:
 	orl	$EFER_NXE, %eax
 1:
 	orl	$(EFER_LME|EFER_SCE), %eax
@@ -192,6 +201,31 @@ END(cpu_spinup_trampoline)
 
 	.text
 GENTRY(cpu_spinup_finish)
+	movl	$mp_sev_guestmode, %eax
+	movl	(%eax), %eax
+	testl	$SEV_STAT_ES_ENABLED, %eax
+	jz	5f
+
+	# We are in SEV-ES mode. MSR or MMIO access is only possible
+	# through a GHCB. Query APIC ID via CPUID leaf 1 EBX
+	movl	$1, %edx
+	# EBX == 1, function 4 cpuid request
+	movl	$(1 << 30 | 4), %eax
+	movl	$MSR_SEV_GHCB, %ecx
+	wrmsr
+	rep vmmcall
+	rdmsr
+	# Make sure the query was successful
+	cmpl	$(1 << 30 | 5), %eax
+	jne	.Lsev_es_terminate
+
+	movl	%edx, %eax
+	shrl	$24, %eax
+	# Skip x2apic initialization if running on SEV-ES or higher.
+	# We cannot do rdmsr/wrmsr without a GHCB. Will be done later in cpu_hatch.
+	jmp	2f
+
+5:
 	movl	x2apic_enabled,%eax
 	testl	%eax,%eax
 	jz	1f
@@ -234,9 +268,18 @@ GENTRY(cpu_spinup_finish)
 	movq	%rax,%cr0
 	call	cpu_hatch
 	movq	$0,-8(%rsp)
-END(cpu_spinup_finish)
 	/* NOTREACHED */
 
+.Lsev_es_terminate:
+	xorl	%edx, %edx
+	movl	$0x100, %eax
+	movl	$MSR_SEV_GHCB, %ecx
+	wrmsr
+	rep vmmcall
+	hlt
+	jmp	.Lsev_es_terminate
+END(cpu_spinup_finish)
+
 	.section .rodata
 	.type	mp_tramp_data_start,@object
 mp_tramp_data_start:
@@ -250,6 +293,12 @@ _TRMP_DATA_LABEL(mp_pdirpa)
 	.long 0
 	.size	mp_pdirpa,4
 
+	.global mp_sev_guestmode
+	.type	mp_sev_guestmode,@object
+_TRMP_DATA_LABEL(mp_sev_guestmode)
+	.long	0
+	.size	mp_sev_guestmode,4
+
 
 _TRMP_DATA_LABEL(.Lmptramp_gdt32)
 	.quad 0x0000000000000000
diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
index 396366de89b..49c7c0ffb70 100644
--- a/sys/arch/amd64/amd64/trap.c
+++ b/sys/arch/amd64/amd64/trap.c
@@ -426,6 +426,17 @@ vctrap(struct trapframe *frame, int user)
 		}
 		break;
 	    }
+	case SVM_VMEXIT_WBINVD:
+		/* There is no special GHCB request for WBNOINVD.
+		 * Signal WBINVD to emulate WBNOINVD.
+		 */
+		if (*rip == 0xf3)
+			frame->tf_rip += 3;
+		else
+			frame->tf_rip += 2;
+		break;
+	case SVM_VMEXIT_NPF:
+		panic("Unexptected SEV nested page fault");
 	default:
 		panic("invalid exit code 0x%llx", ghcb_regs.exitcode);
 	}
@@ -436,10 +447,10 @@ vctrap(struct trapframe *frame, int user)
 	ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
 
 	/* Sync out to GHCB */
-	ghcb = (struct ghcb_sa *)ghcb_vaddr;
+	ghcb = curcpu()->ci_ghcb;
 	ghcb_sync_out(frame, &ghcb_regs, ghcb, &syncout);
 
-	wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+	wrmsr(MSR_SEV_GHCB, curcpu()->ci_ghcb_paddr);
 
 	/* Call hypervisor. */
 	vmgexit();
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 8b82db6b4f6..cbfe817ea9c 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -590,6 +590,40 @@ KUENTRY(x2apic_eoi)
 	lfence
 END(x2apic_eoi)
 
+/*
+ * With SEV-ES the wrmsr instruction traps into the #VC handler which
+ * needs the kernel GS_BASE. So if we come from the userland, we need to
+ * do swapgs. The fast IPI handler does not perform swapgs, so we need
+ * to do it here. In order to detect whether we come from user or kernel
+ * land, this function MUST be called before %rsp is modified.
+ */
+KUENTRY(x2apic_eoi_swapgs)
+	/* If the come from userland, go swapgs to enabled curcpu() */
+	testb	$SEL_RPL,16(%rsp)
+	jz	1f
+	swapgs
+	FENCE_SWAPGS_MIS_TAKEN
+1:
+	pushq	%rax
+	pushq	%rcx
+	pushq	%rdx
+	mov     $MSR_X2APIC_EOI,%ecx
+	mov     $0,%eax
+	mov     $0,%edx
+	wrmsr
+	popq	%rdx
+	popq	%rcx
+	popq	%rax
+
+	testb	$SEL_RPL,16(%rsp)
+	jz	2f
+	swapgs
+	FENCE_SWAPGS_MIS_TAKEN
+2:
+	retq
+	lfence
+END(x2apic_eoi_swapgs)
+
 #if NLAPIC > 0
 #ifdef MULTIPROCESSOR
 KIDTVEC(recurse_lapic_ipi)
@@ -629,9 +663,9 @@ END(Xresume_lapic_ipi)
  */
 /* invalidate the entire TLB, no PCIDs version */
 IDTVEC(ipi_invltlb)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	movq	%cr3, %rax
 	movq	%rax, %cr3
@@ -646,11 +680,11 @@ END(Xipi_invltlb)
 #if NVMM > 0
 /* Invalidate VMX EPT */
 IDTVEC(ipi_invept)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 
-	ioapic_asm_ack()
-
 	movq	$ept_shoot_vid, %rax
 	movq	ept_shoot_mode, %rdx
 	invept	(%rax), %rdx
@@ -666,9 +700,9 @@ END(Xipi_invept)
 
 /* invalidate a single page, no PCIDs version */
 IDTVEC(ipi_invlpg)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	movq	tlb_shoot_addr1, %rax
 	invlpg	(%rax)
@@ -682,11 +716,11 @@ END(Xipi_invlpg)
 
 /* invalidate a range of pages, no PCIDs version */
 IDTVEC(ipi_invlrange)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 
-	ioapic_asm_ack()
-
 	movq	tlb_shoot_addr1, %rax
 	movq	tlb_shoot_addr2, %rdx
 1:	invlpg	(%rax)
@@ -706,9 +740,9 @@ END(Xipi_invlrange)
  * Invalidate the userspace PCIDs.
  */
 IDTVEC(ipi_invltlb_pcid)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	/* set the type */
 	movl	$INVPCID_PCID,%eax
@@ -740,9 +774,9 @@ END(Xipi_invltlb_pcid)
  * while userspace VAs are present in PCIDs 1 and 2.
  */
 IDTVEC(ipi_invlpg_pcid)
-	pushq	%rax
+	ioapic_asm_ack_fast_ipi()
 
-	ioapic_asm_ack()
+	pushq	%rax
 
 	/* space for the INVPCID descriptor */
 	subq	$16,%rsp
@@ -777,12 +811,12 @@ END(Xipi_invlpg_pcid)
  * PCIDs 0 and 1, while userspace VAs are present in PCIDs 1 and 2.
  */
 IDTVEC(ipi_invlrange_pcid)
+	ioapic_asm_ack_fast_ipi()
+
 	pushq	%rax
 	pushq	%rdx
 	pushq	%rcx
 
-	ioapic_asm_ack()
-
 	/* space for the INVPCID descriptor */
 	subq	$16,%rsp
 
@@ -817,7 +851,7 @@ IDTVEC(ipi_invlrange_pcid)
 END(Xipi_invlrange_pcid)
 
 IDTVEC(ipi_wbinvd)
-	ioapic_asm_ack()
+	ioapic_asm_ack_fast_ipi()
 
 	wbinvd
 
diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h
index 2ccb638a8e8..6b6bfee62e1 100644
--- a/sys/arch/amd64/include/codepatch.h
+++ b/sys/arch/amd64/include/codepatch.h
@@ -70,6 +70,7 @@ void codepatch_disable(void);
 #define CPTAG_RETPOLINE_R11		15
 #define CPTAG_RETPOLINE_R13		16
 #define CPTAG_IBPB_NOP			17
+#define CPTAG_EOI_FAST_IPI		18
 
 /*
  * stac/clac SMAP instructions have lfence like semantics.  Let's
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index 8c71c424a8f..6b725ff796a 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -107,6 +107,7 @@ enum cpu_vendor {
  */
 struct x86_64_tss;
 struct vcpu;
+struct ghcb_sa;
 struct cpu_info {
 	/*
 	 * The beginning of this structure in mapped in the userspace "u-k"
@@ -219,6 +220,9 @@ struct cpu_info {
 	struct uvm_pmr_cache	ci_uvm;		/* [o] page cache */
 #endif
 
+	struct ghcb_sa	*ci_ghcb;
+	paddr_t		ci_ghcb_paddr;
+
 	struct ksensordev	ci_sensordev;
 	struct ksensor		ci_sensor;
 	struct ksensor		ci_hz_sensor;
diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h
index fb1de0cb1b1..5b2669a36aa 100644
--- a/sys/arch/amd64/include/cpuvar.h
+++ b/sys/arch/amd64/include/cpuvar.h
@@ -71,6 +71,7 @@ struct cpu_functions {
 };
 
 extern struct cpu_functions mp_cpu_funcs;
+extern struct cpu_functions mp_sev_es_cpu_funcs;
 
 #define CPU_ROLE_SP	0
 #define CPU_ROLE_BP	1
diff --git a/sys/arch/amd64/include/ghcb.h b/sys/arch/amd64/include/ghcb.h
index bac63968d24..a39d5a9401f 100644
--- a/sys/arch/amd64/include/ghcb.h
+++ b/sys/arch/amd64/include/ghcb.h
@@ -111,9 +111,6 @@ struct ghcb_sync {
 
 #ifndef _LOCORE
 
-extern vaddr_t ghcb_vaddr;
-extern paddr_t ghcb_paddr;
-
 struct ghcb_extra_regs {
 	uint64_t	 exitcode;
 	uint64_t	 exitinfo1;
@@ -136,6 +133,9 @@ void	ghcb_sync_in(struct trapframe *, struct ghcb_extra_regs *,
 	    struct ghcb_sa *, struct ghcb_sync *);
 void	_ghcb_mem_rw(vaddr_t, int, void *, bool);
 void	_ghcb_io_rw(uint16_t, int, uint32_t *, bool);
+#ifdef MULTIPROCESSOR
+int	ghcb_get_ap_jump_table(paddr_t *);
+#endif
 
 static inline uint8_t
 ghcb_mem_read_1(vaddr_t addr)
diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h
index 99b22923499..3288176fb22 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -114,7 +114,21 @@
 
 #include <machine/codepatch.h>
 
-#define ioapic_asm_ack(num) 					 \
+/*
+ * This macro must also work if swapgs has not been called on entry
+ * from user land.
+ */
+#define ioapic_asm_ack_fast_ipi(num)				\
+	CODEPATCH_START						;\
+	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
+	CODEPATCH_END(CPTAG_EOI_FAST_IPI)
+
+
+/*
+ * This macro assumes that swapgs has already been called (e.g. by
+ * INTRENTRY).
+ */
+#define ioapic_asm_ack(num)					 \
 	CODEPATCH_START						;\
 	movl	$0,(local_apic+LAPIC_EOI)(%rip)			;\
 	CODEPATCH_END(CPTAG_EOI)
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 25b1618ad1f..23ee60eb465 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -271,6 +271,7 @@
  */
 #define SEV_VMGEXIT_MMIO_READ			0x80000001
 #define SEV_VMGEXIT_MMIO_WRITE			0x80000002
+#define SEV_VMGEXIT_AP_JUMP_TABLE		0x80000005
 
 #ifndef _LOCORE
 
diff --git a/sys/dev/acpi/acpimadt.c b/sys/dev/acpi/acpimadt.c
index 275f2b1e6ce..f9f3a0a6538 100644
--- a/sys/dev/acpi/acpimadt.c
+++ b/sys/dev/acpi/acpimadt.c
@@ -263,6 +263,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux)
 			caa.cpu_acpi_proc_id = entry->madt_lapic.acpi_proc_id;
 #ifdef MULTIPROCESSOR
 			caa.cpu_func = &mp_cpu_funcs;
+#ifdef __amd64__
+			if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+				caa.cpu_func = &mp_sev_es_cpu_funcs;
+#endif
 #endif
 #ifdef __i386__
 			/*
@@ -318,6 +322,10 @@ acpimadt_attach(struct device *parent, struct device *self, void *aux)
 			caa.cpu_acpi_proc_id = entry->madt_x2apic.acpi_proc_uid;
 #ifdef MULTIPROCESSOR
 			caa.cpu_func = &mp_cpu_funcs;
+#ifdef __amd64__
+			if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+				caa.cpu_func = &mp_sev_es_cpu_funcs;
+#endif
 #endif
 #ifdef __i386__
 			/*