From: Alexander Bluhm Subject: Re: SEV-ES guest: locore #VC trap handling To: Mike Larkin Cc: tech@openbsd.org Date: Sun, 15 Jun 2025 16:55:12 +0200 On Sat, Jun 14, 2025 at 10:23:53PM -0700, Mike Larkin wrote: > On Wed, May 21, 2025 at 05:10:27PM +0200, Hans-J?rg H?xer wrote: > > Hi, > > > > this change deals with locore for SEV-ES enabled guests. The approach > > might be a bit controversial. And it requires a diff for vmd(8), that > > I've also attached, to simplify the discussion: > > > > SEV-ES guest: locore #VC trap handling > > > > When locore is executed by a SEV-ES enabled guest the first cpuid > > instruction will raise a #VC trap that will need to be handled. > > However, at that point in time the guest does not know wether it's > > a guest at all, if it is running on an AMD cpu with SEV-ES enabled, > > etc. > > > > To resolve this chicken-egg situation we undconditionally setup a > > > > As vmd(8) configures the runtime for locore to be in 32 bit > > compatibility mode a raised #VC exception will switch to long mode. > > And the CPU will expect a 64 bit entry in the IDT. When running > > on eg. KVM locore is execute in 32 bit legacy mode. There the > > CPU will expect a 32 bit entry in the IDT. > > > > To accomodate both situations, we set up both 64 and 32 bit handler > > in the IDT. > > > > Additionally, vmd(8) has to setup a long mode segment in the GDT. > > > > Both #VC trap handler use the MSR protocol to talk to the hypervisor > > to emulate CPUID. The MSR protocol only supports "simple" CPUID > > without subfunctions. > > > > Note: When SEV-ES is enabled, the hypervisor can not intercept > > writes to EFER beforehand, only after the write. Thus on vmm(4) > > with directly executed kernel we are in compatibility mode and > > EFER_LMA is set. As resetting EFER_LMA raises #GP we have to > > preserve it. > > > > Take care, > > HJ. > > > > This one probably needs a lot of testing. What tests have you guys done so > far? My SEV-ES machine is available now, I will test this and let you know > but we probably want a lot of testing from different host/guest combos. I know that this part does not break SEV (without ES) on vmm/vmd and kvm/qemu. Below is the complete diff that hshoexer@ sent some months ago rebased to current. With that I can run OpenBSD guest with SEV-ES in vmm/vmd. On kvm/qemu SEV still works, but I cannot get guest running with SEV-ES. So if you agree with the locore0 part, that was in the original mail of this thread, I would like to commit it. bluhm Index: sys/arch/amd64/amd64/ghcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/ghcb.c,v diff -u -p -r1.1 ghcb.c --- sys/arch/amd64/amd64/ghcb.c 24 May 2025 12:47:00 -0000 1.1 +++ sys/arch/amd64/amd64/ghcb.c 15 Jun 2025 11:03:02 -0000 @@ -28,6 +28,9 @@ const uint64_t ghcb_sz_masks[] = { 0x00000000ffffffffULL, 0xffffffffffffffffULL }; +vaddr_t ghcb_vaddr; +paddr_t ghcb_paddr; + /* * ghcb_clear * @@ -92,13 +95,28 @@ ghcb_valid(struct ghcb_sa *ghcb) /* * ghcb_verify_bm * - * To be verified positive, the given expected bitmap must be at - * least a subset of the provided valid bitmap. - * Used by host and guest. + * To be verified positive, the given valid bitmap must exactly + * match the expected bitmap. + * Used by host only. */ int ghcb_verify_bm(uint8_t *valid_bm, uint8_t *expected_bm) { + return (memcmp(valid_bm, expected_bm, GHCB_VB_SZ)); +} + +/* + * ghcb_verify_bm_guest + * + * To be verified positive, the given expected bitmap must be at + * least a subset of the provided valid bitmap. This ensures, the + * host provides at least the information requested by the guest. + * Used by guest only. + * This is required for running on a Linux/KVM host. + */ +int +ghcb_verify_bm_guest(uint8_t *valid_bm, uint8_t *expected_bm) +{ return ((ghcb_valbm_isset(expected_bm, GHCB_RAX) && !ghcb_valbm_isset(valid_bm, GHCB_RAX)) || (ghcb_valbm_isset(expected_bm, GHCB_RBX) && @@ -107,18 +125,10 @@ ghcb_verify_bm(uint8_t *valid_bm, uint8_ !ghcb_valbm_isset(valid_bm, GHCB_RCX)) || (ghcb_valbm_isset(expected_bm, GHCB_RDX) && !ghcb_valbm_isset(valid_bm, GHCB_RDX)) || - (ghcb_valbm_isset(expected_bm, GHCB_SW_EXITCODE) && - !ghcb_valbm_isset(valid_bm, GHCB_SW_EXITCODE)) || (ghcb_valbm_isset(expected_bm, GHCB_SW_EXITINFO1) && !ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO1)) || (ghcb_valbm_isset(expected_bm, GHCB_SW_EXITINFO2) && - !ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO2)) || - (ghcb_valbm_isset(expected_bm, GHCB_SW_SCRATCH) && - !ghcb_valbm_isset(valid_bm, GHCB_SW_SCRATCH)) || - (ghcb_valbm_isset(expected_bm, GHCB_XCR0) && - !ghcb_valbm_isset(valid_bm, GHCB_XCR0)) || - (ghcb_valbm_isset(expected_bm, GHCB_XSS) && - !ghcb_valbm_isset(valid_bm, GHCB_XSS))); + !ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO2))); } /* Index: sys/arch/amd64/amd64/identcpu.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/identcpu.c,v diff -u -p -r1.150 identcpu.c --- sys/arch/amd64/amd64/identcpu.c 29 Apr 2025 20:19:48 -0000 1.150 +++ sys/arch/amd64/amd64/identcpu.c 15 Jun 2025 11:03:01 -0000 @@ -67,6 +67,7 @@ int cpuspeed; int amd64_has_xcrypt; int amd64_pos_cbit; /* C bit position for SEV */ +/* Minimum ASID value for an SEV enabled, SEV-ES disabled guest. */ int amd64_min_noes_asid; int has_rdrand; int has_rdseed; @@ -712,6 +713,10 @@ identifycpu(struct cpu_info *ci) CPUID_AMDSEV_EDX_BITS); amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f); amd64_min_noes_asid = ci->ci_feature_amdsev_edx; + if (cpu_sev_guestmode && CPU_IS_PRIMARY(ci)) + printf("\n%s: SEV%s guest mode", ci->ci_dev->dv_xname, + ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED) ? + "-ES" : ""); } printf("\n"); Index: sys/arch/amd64/amd64/locore0.S =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/locore0.S,v diff -u -p -r1.27 locore0.S --- sys/arch/amd64/amd64/locore0.S 5 May 2025 23:02:39 -0000 1.27 +++ sys/arch/amd64/amd64/locore0.S 15 Jun 2025 11:03:01 -0000 @@ -111,6 +111,7 @@ #include #include #include +#include /* * override user-land alignment before including asm.h @@ -193,6 +194,58 @@ bi_size_ok: pushl $PSL_MBO popfl + /* + * Setup temporary #VC trap handler, in case we are running + * on an AMD CPU in SEV-ES guest mode. Will be reset by + * init_x86_64(). + * We are setting up two handlers: + * + * 1) locore_vc_trap32: Triggered when we are running in + * 32-bit legacy mode. + * + * 2) locore_vc_trap64: Triggered when we are running in + * 32-bit compatibility mode. + * + * The latter one is used by vmd(8). + */ + movl $RELOC(early_idt), %ecx + movl $T_VC, %edx + leal (%ecx, %edx, 8), %ecx /* 32bit #VC IDT slot */ + + pushl %cs /* get current %cs */ + popl %ebx + shll $16, %ebx + + movl $RELOC(locore_vc_trap32), %eax + andl $0x0000ffff, %eax + orl %ebx, %eax /* use current %cs */ + movl %eax, (%ecx) + + movl $RELOC(locore_vc_trap32), %eax + andl $0xffff0000, %eax + orl $((0x80 | SDT_SYS386IGT) << 8), %eax + movl %eax, 4(%ecx) + + movl $RELOC(early_idt), %ecx + movl $(2 * T_VC), %edx + leal (%ecx, %edx, 8), %ecx /* 64bit #VC IDT slot */ + + movl $RELOC(locore_vc_trap64), %eax + andl $0x0000ffff, %eax + orl $(GSEL(3, SEL_KPL) << 16), %eax + movl %eax, (%ecx) + + movl $RELOC(locore_vc_trap64), %eax + andl $0xffff0000, %eax + orl $((0x80 | SDT_SYS386IGT) << 8), %eax + movl %eax, 4(%ecx) + xorl %eax, %eax + movl %eax, 8(%ecx) + movl %eax, 12(%ecx) + + movl $RELOC(idtlc), %eax + lidt (%eax) + /* Reset debug control registers */ xorl %eax,%eax movl %eax,%dr6 @@ -293,8 +346,9 @@ cont: /* Are we in guest mode with SEV enabled? */ movl $MSR_SEV_STATUS, %ecx rdmsr - andl $SEV_STAT_ENABLED, %eax + testl $SEV_STAT_ENABLED, %eax jz .Lno_sev + movl %eax, RELOC(cpu_sev_guestmode) /* we are a SEV guest */ /* Determine C bit position */ movl %ebx, %ecx /* %ebx from previous cpuid */ @@ -337,8 +391,6 @@ cont: andl %eax, RELOC(pg_frame + 4) /* apply mask */ andl %eax, RELOC(pg_lgframe + 4) - movl $0x1, RELOC(cpu_sev_guestmode) /* we are a SEV guest */ - .Lno_sev: /* @@ -384,7 +436,9 @@ cont: #define PROC0_DMP2_OFF (PROC0_DMP3_OFF + NDML3_ENTRIES * NBPG) #define TABLESIZE \ ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES + \ - NDML3_ENTRIES + NDML2_ENTRIES + 3) * NBPG) + NDML3_ENTRIES + NDML2_ENTRIES + 2 + 3) * NBPG) +#define PROC0_GHCB_OFF (TABLESIZE - 5 * NBPG) +#define GHCB_SIZE (2 * NBPG) #define fillkpt \ pushl %ebp ; /* save */ \ @@ -408,6 +462,17 @@ cont: loop 1b ; /* till finished */ \ popl %ebp + +#define fillkpt_nx_nc \ + pushl %ebp ; /* save */ \ + movl RELOC((pg_nx + 4)), %ebp ; /* NX bit? */ \ +1: movl %eax,(%ebx) ; /* store phys addr */ \ + movl %ebp,4(%ebx) ; /* upper 32 bits */ \ + addl $8,%ebx ; /* next pte/pde */ \ + addl $NBPG,%eax ; /* next phys page */ \ + loop 1b ; /* till finished */ \ + popl %ebp + /* Find end of kernel image. */ movl $RELOC(end),%edi #if (NKSYMS || defined(DDB)) @@ -514,6 +579,16 @@ map_tables: shrl $PGSHIFT,%ecx fillkpt_nx + /* Re-Map GHCB shared (ie. unencrypted) */ + /* XXX hshoexer: Only in SEV-ES guestmode. */ + pushl %ebx /* save current slot */ + subl $(5 << 3),%ebx /* move back to slot of GHCB */ + leal (PROC0_GHCB_OFF)(%esi),%eax + orl $(PG_V|PG_KW), %eax + movl $(GHCB_SIZE>>PGSHIFT), %ecx + fillkpt_nx_nc + popl %ebx /* continue with slot saved above */ + /* Map ISA I/O mem (later atdevbase) RW, NX */ movl $(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax movl $(IOM_SIZE>>PGSHIFT),%ecx @@ -631,7 +706,6 @@ store_pte: */ movl $MSR_EFER,%ecx rdmsr - xorl %eax,%eax /* XXX */ orl $(EFER_LME|EFER_SCE),%eax movl RELOC((pg_nx + 4)), %ebx cmpl $0, %ebx @@ -717,6 +791,12 @@ longmode_hi: addq %rsi,%rdx movq %rdx,atdevbase(%rip) + /* Relocate GHCB. */ + /* XXX hshoexer: Only in SEV-ES guestmode. */ + movq $(PROC0_GHCB_OFF+KERNBASE),%rdx + addq %rsi,%rdx + movq %rdx,ghcb_vaddr(%rip) + /* Record start of symbols */ movq $__kernel_bss_end, ssym(%rip) @@ -739,12 +819,131 @@ longmode_hi: movw %ax,%fs leaq TABLESIZE(%rsi),%rdi + subq $(NBPG*2), %rdi subq $(NBPG*3), %rdi /* XXX merge these */ call init_x86_64 call main + /* MSR Protocol Request Codes */ +#define MSRPROTO_CPUID_REQ 0x4 +#define MSRPROTO_TERM_REQ 0x100 + +vc_cpuid64: + shll $30, %eax /* requested register */ + orl $MSRPROTO_CPUID_REQ, %eax + movl %ebx, %edx /* CPUID function */ + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall + rdmsr + ret + + .globl locore_vc_trap64 +locore_vc_trap64: + pushq %rax + pushq %rbx + pushq %rcx + pushq %rdx + +#define SVM_VMEXIT_CPUID 0x72 + cmpl $SVM_VMEXIT_CPUID, 32(%rsp) + jne .Lterminate64 + + movl %eax, %ebx /* save CPUID function */ + + movl $0, %eax /* request cpuid, get %eax */ + call vc_cpuid64 + movq %rdx, 24(%rsp) + + movl $1, %eax /* get %ebx */ + call vc_cpuid64 + movq %rdx, 16(%rsp) + + movl $2, %eax /* get %ecx */ + call vc_cpuid64 + movq %rdx, 8(%rsp) + + movl $3, %eax /* get %edx */ + call vc_cpuid64 + movq %rdx, 0(%rsp) + + popq %rdx + popq %rcx + popq %rbx + popq %rax + addq $8, %rsp + addq $2, (%rsp) + iretq + +.Lterminate64: + movl $MSRPROTO_TERM_REQ, %eax + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall +.Lterm_loop64: + hlt + jmp .Lterm_loop64 + + .code32 +vc_cpuid32: + shll $30, %eax /* requested register */ + orl $MSRPROTO_CPUID_REQ, %eax + movl %ebx, %edx /* CPUID function */ + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall + rdmsr + ret + + .globl locore_vc_trap32 +locore_vc_trap32: + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + +#define SVM_VMEXIT_CPUID 0x72 + cmpl $SVM_VMEXIT_CPUID, 16(%esp) + jne .Lterminate32 + + movl %eax, %ebx /* save CPUID function */ + + movl $0, %eax /* request cpuid, get %eax */ + call vc_cpuid32 + movl %edx, 12(%esp) + + movl $1, %eax /* get %ebx */ + call vc_cpuid32 + movl %edx, 8(%esp) + + movl $2, %eax /* get %ecx */ + call vc_cpuid32 + movl %edx, 4(%esp) + + movl $3, %eax /* get %edx */ + call vc_cpuid32 + movl %edx, 0(%esp) + + popl %edx + popl %ecx + popl %ebx + popl %eax + addl $4, %esp + addl $2, (%esp) + iret + +.Lterminate32: + movl $MSRPROTO_TERM_REQ, %eax + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall +.Lterm_loop32: + hlt + jmp .Lterm_loop32 + + .section .codepatch,"a" .align 8, 0xcc .globl codepatch_begin @@ -757,6 +956,20 @@ codepatch_end: .previous .data + .globl idtlc /* temporary locore IDT */ +idtlc: + .word early_idt_end-early_idt-1 + .long _RELOC(early_idt) + .align 64, 0xcc + + .globl early_idt +early_idt: + .rept NIDT + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .endr +early_idt_end: + .globl gdt64 gdt64: .word gdt64_end-gdt64_start-1 Index: sys/arch/amd64/amd64/machdep.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/machdep.c,v diff -u -p -r1.299 machdep.c --- sys/arch/amd64/amd64/machdep.c 21 May 2025 04:11:57 -0000 1.299 +++ sys/arch/amd64/amd64/machdep.c 15 Jun 2025 11:03:01 -0000 @@ -100,6 +100,7 @@ #include #include #include +#include #include #include @@ -491,6 +492,7 @@ bios_sysctl(int *name, u_int namelen, vo extern int tsc_is_invariant; extern int amd64_has_xcrypt; extern int need_retpoline; +extern int cpu_sev_guestmode; const struct sysctl_bounded_args cpuctl_vars[] = { { CPU_LIDACTION, &lid_action, -1, 2 }, @@ -500,6 +502,7 @@ const struct sysctl_bounded_args cpuctl_ { CPU_XCRYPT, &amd64_has_xcrypt, SYSCTL_INT_READONLY }, { CPU_INVARIANTTSC, &tsc_is_invariant, SYSCTL_INT_READONLY }, { CPU_RETPOLINE, &need_retpoline, SYSCTL_INT_READONLY }, + { CPU_SEVGUESTMODE, &cpu_sev_guestmode, SYSCTL_INT_READONLY }, }; /* @@ -1314,6 +1317,38 @@ cpu_init_idt(void) lidt(®ion); } +uint64_t early_gdt[GDT_SIZE / 8]; + +void +cpu_init_early_vctrap(paddr_t addr) +{ + struct region_descriptor region; + + extern struct region_descriptor gdt64; + extern struct gate_descriptor early_idt[NIDT]; + extern void Xvctrap_early(void); + + /* Setup temporary "early" longmode GDT, will be reset soon */ + memset(early_gdt, 0, sizeof(early_gdt)); + set_mem_segment(GDT_ADDR_MEM(early_gdt, GCODE_SEL), 0, 0xfffff, + SDT_MEMERA, SEL_KPL, 1, 0, 1); + set_mem_segment(GDT_ADDR_MEM(early_gdt, GDATA_SEL), 0, 0xfffff, + SDT_MEMRWA, SEL_KPL, 1, 0, 1); + setregion(®ion, early_gdt, GDT_SIZE - 1); + lgdt(®ion); + + /* Setup temporary "early" longmode #VC entry, will be reset soon */ + setgate(&early_idt[T_VC], Xvctrap_early, 0, SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setregion(®ion, early_idt, NIDT * sizeof(idt[0]) - 1); + lidt(®ion); + + /* Tell vmm(4) about our GHCB. */ + ghcb_paddr = addr; + memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE); + wrmsr(MSR_SEV_GHCB, ghcb_paddr); +} + void cpu_init_extents(void) { @@ -1433,6 +1468,13 @@ init_x86_64(paddr_t first_avail) bios_memmap_t *bmp; int x, ist; uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30; + + /* + * locore0 mapped 2 pages for use as GHCB before pmap is initialized. + */ + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + cpu_init_early_vctrap(first_avail); + first_avail += 2 * NBPG; /* * locore0 mapped 3 pages for use before the pmap is initialized Index: sys/arch/amd64/amd64/trap.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/trap.c,v diff -u -p -r1.107 trap.c --- sys/arch/amd64/amd64/trap.c 5 May 2025 23:02:39 -0000 1.107 +++ sys/arch/amd64/amd64/trap.c 15 Jun 2025 11:03:01 -0000 @@ -86,6 +86,8 @@ #include #include #include +#include +#include #ifdef DDB #include #include @@ -95,6 +97,7 @@ int upageflttrap(struct trapframe *, uint64_t); int kpageflttrap(struct trapframe *, uint64_t); +int vctrap(struct trapframe *, int); void kerntrap(struct trapframe *); void usertrap(struct trapframe *); void ast(struct trapframe *); @@ -123,6 +126,7 @@ const char * const trap_type[] = { "SSE FP exception", /* 19 T_XMM */ "virtualization exception", /* 20 T_VE */ "control protection exception", /* 21 T_CP */ + "VMM communication exception", /* 29 T_VC */ }; const int trap_types = nitems(trap_type); @@ -297,6 +301,150 @@ kpageflttrap(struct trapframe *frame, ui return 1; } +int +vctrap(struct trapframe *frame, int user) +{ + uint64_t sw_exitcode, sw_exitinfo1, sw_exitinfo2; + uint8_t *rip = (uint8_t *)(frame->tf_rip); + uint16_t port; + struct ghcb_sync syncout, syncin; + struct ghcb_sa *ghcb; + + intr_disable(); + + memset(&syncout, 0, sizeof(syncout)); + memset(&syncin, 0, sizeof(syncin)); + + sw_exitcode = frame->tf_err; + sw_exitinfo1 = 0; + sw_exitinfo2 = 0; + + switch (sw_exitcode) { + case SVM_VMEXIT_CPUID: + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout); + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout); + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin); + ghcb_sync_val(GHCB_RBX, GHCB_SZ32, &syncin); + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncin); + ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin); + frame->tf_rip += 2; + break; + case SVM_VMEXIT_MSR: { + if (user) + return 0; /* not allowed from userspace */ + if (*rip == 0x0f && *(rip + 1) == 0x30) { + /* WRMSR */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout); + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout); + ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncout); + sw_exitinfo1 = 1; + } else if (*rip == 0x0f && *(rip + 1) == 0x32) { + /* RDMSR */ + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout); + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin); + ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin); + } else + panic("failed to decode MSR"); + frame->tf_rip += 2; + break; + } + case SVM_VMEXIT_IOIO: { + if (user) + return 0; /* not allowed from userspace */ + switch (*rip) { + case 0x66: { + switch (*(rip + 1)) { + case 0xef: /* out %ax,(%dx) */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncout); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | + (1ULL << 5); + frame->tf_rip += 2; + break; + case 0xed: /* in (%dx),%ax */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncin); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | + (1ULL << 5) | (1ULL << 0); + frame->tf_rip += 2; + break; + default: + panic("failed to decode prefixed IOIO"); + } + break; + } + case 0xe4: /* in $0x71,%al */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin); + port = *(rip + 1); + sw_exitinfo1 = (port << 16) | (1ULL << 4) | + (1ULL << 0); + frame->tf_rip += 2; + break; + case 0xe6: /* outb %al,$0x43 */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout); + port = *(rip + 1); + sw_exitinfo1 = (port << 16) | (1ULL << 4); + frame->tf_rip += 2; + break; + case 0xec: /* in (%dx),%al */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 4) | + (1ULL << 0); + frame->tf_rip += 1; + break; + case 0xed: /* in (%dx),%eax */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 6) | + (1ULL << 0); + frame->tf_rip += 1; + break; + case 0xee: /* out %al,(%dx) */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 4); + frame->tf_rip += 1; + break; + case 0xef: /* out %eax,(%dx) */ + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 6); + frame->tf_rip += 1; + break; + default: + panic("failed to decode IOIO"); + } + break; + } + default: + panic("invalid exit code 0x%llx", sw_exitcode); + } + + /* Always required */ + ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout); + ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout); + ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout); + + /* Sync out to GHCB */ + ghcb = (struct ghcb_sa *)ghcb_vaddr; + ghcb_sync_out(frame, sw_exitcode, sw_exitinfo1, sw_exitinfo2, ghcb, + &syncout); + + /* Call hypervisor. */ + vmgexit(); + + /* Verify response */ + if (ghcb_verify_bm_guest(ghcb->valid_bitmap, syncin.valid_bitmap)) { + ghcb_clear(ghcb); + panic("invalid hypervisor response"); + } + + /* Sync in from GHCB */ + ghcb_sync_in(frame, ghcb, &syncin); + + return 1; +} /* * kerntrap(frame): @@ -348,6 +496,11 @@ kerntrap(struct trapframe *frame) else return; #endif /* NISA > 0 */ + + case T_VC: + if (vctrap(frame, 0)) + return; + goto we_re_toast; } } @@ -427,7 +580,12 @@ usertrap(struct trapframe *frame) code = (frame->tf_err & 0x7fff) < 4 ? ILL_BTCFI : ILL_BADSTK; break; - + case T_VC: + if (vctrap(frame, 1)) + goto out; + sig = SIGILL; + code = ILL_PRVOPC; + break; case T_PAGEFLT: /* page fault */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", Index: sys/arch/amd64/amd64/vector.S =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/vector.S,v diff -u -p -r1.100 vector.S --- sys/arch/amd64/amd64/vector.S 23 Apr 2025 15:08:05 -0000 1.100 +++ sys/arch/amd64/amd64/vector.S 15 Jun 2025 11:03:01 -0000 @@ -373,6 +373,43 @@ IDTVEC(trap14) ZTRAP(T_VE) IDTVEC(trap15) TRAP(T_CP) + +IDTVEC(trap1d) + /* + * #VC is AMD CPU specific, thus we don't use any Intel Meltdown + * workarounds. + * + * We handle #VC different from other traps, as we do not want + * to re-enable interrupts. #VC might happen during IRQ handling + * before a specific hardware interrupt gets masked. Re-enabling + * interrupts in the trap handler might cause nested IRQs of + * the same level. Thus keep interrupts disabled. + * + * On Intel CPUs we could use code patch to reset this entry. + */ + pushq $T_VC + testb $SEL_RPL,24(%rsp) + je vctrap_kern + swapgs + FENCE_SWAPGS_MIS_TAKEN + movq %rax,CPUVAR(SCRATCH) + + /* #VC from userspace */ + TRAP_ENTRY_USER + cld + SMAP_CLAC + /* shortcut to regular path, but with interrupts disabled */ + jmp recall_trap + + /* #VC from kernspace */ +vctrap_kern: + FENCE_NO_SAFE_SMAP + TRAP_ENTRY_KERN + cld + SMAP_CLAC + /* shortcut to regular path, but with interrupts disabled */ + jmp .Lreal_kern_trap + IDTVEC(trap1f) IDTVEC_ALIAS(trap16, trap1f) IDTVEC_ALIAS(trap17, trap1f) @@ -381,7 +418,6 @@ IDTVEC_ALIAS(trap19, trap1f) IDTVEC_ALIAS(trap1a, trap1f) IDTVEC_ALIAS(trap1b, trap1f) IDTVEC_ALIAS(trap1c, trap1f) -IDTVEC_ALIAS(trap1d, trap1f) IDTVEC_ALIAS(trap1e, trap1f) /* 22 - 31 reserved for future exp */ ZTRAP(T_RESERVED) @@ -513,6 +549,16 @@ END(alltraps_kern) END(alltraps_kern_meltdown) KTEXT_PAGE_END +/* #VC trap entry for early bootstrap */ +IDTVEC(vctrap_early) + pushq $T_VC + TRAP_ENTRY_KERN /* early #VC has to be in kernel mode */ + cld + movq %rsp, %rdi + movq $0x0, %rsi + call vctrap + movq $0,-8(%rsp) + INTRFASTEXIT /* * Macros for interrupt entry, call to handler, and exit. Index: sys/arch/amd64/amd64/vmm_machdep.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/vmm_machdep.c,v diff -u -p -r1.57 vmm_machdep.c --- sys/arch/amd64/amd64/vmm_machdep.c 3 Jun 2025 19:15:29 -0000 1.57 +++ sys/arch/amd64/amd64/vmm_machdep.c 15 Jun 2025 11:03:02 -0000 @@ -1588,15 +1588,15 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR | SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP; - /* With SEV-ES we cannot force access XCR0, thus no intercept */ - if (xsave_mask && !vcpu->vc_seves) + if (xsave_mask && !vcpu->vc_seves) /* XXX hshoexer */ vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV; if (vcpu->vc_seves) { - /* With SEV-ES also intercept post EFER and CR[04] writes */ + /* With SEV-ES also intercept post EFER and CR[048] writes */ vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE; vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST; vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST; + vmcb->v_intercept2 |= SVM_INTERCEPT_CR8_WRITE_POST; } /* Setup I/O bitmap */ @@ -1617,22 +1617,13 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s svm_setmsrbrw(vcpu, MSR_FSBASE); svm_setmsrbrw(vcpu, MSR_GSBASE); svm_setmsrbrw(vcpu, MSR_KERNELGSBASE); + svm_setmsrbrw(vcpu, MSR_SEV_GHCB); /* allow reading SEV status */ svm_setmsrbrw(vcpu, MSR_SEV_STATUS); if (vcpu->vc_seves) { - /* Allow read/write GHCB guest physical address */ - svm_setmsrbrw(vcpu, MSR_SEV_GHCB); - - /* Allow reading MSR_XSS; for CPUID Extended State Enum. */ - svm_setmsrbr(vcpu, MSR_XSS); - - /* - * With SEV-ES SVME can't be modified by the guest; - * host can only intercept post-write (see - * SVM_INTERCEPT_EFER_WRITE above). - */ + /* With SEV-ES SVME can not be modified by the guest */ svm_setmsrbrw(vcpu, MSR_EFER); } else { /* EFER is R/O so we can ensure the guest always has SVME */ @@ -1650,7 +1641,10 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s vmcb->v_asid = vcpu->vc_vpid; /* TLB Control - First time in, flush all*/ - vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL; + if (vcpu->vc_seves) + vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ASID; /* XXX hshoexer */ + else + vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL; /* INTR masking */ vmcb->v_intr_masking = 1; @@ -1676,13 +1670,23 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s /* Set VMSA. */ vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa; + + /* XXX hshoexer: LBR: guest_state_protected flag? */ + svm_setmsrbrw(vcpu, MSR_DEBUGCTLMSR); + svm_setmsrbrw(vcpu, MSR_LASTBRANCHFROMIP); + svm_setmsrbrw(vcpu, MSR_LASTBRANCHTOIP); + svm_setmsrbrw(vcpu, MSR_LASTINTFROMIP); + svm_setmsrbrw(vcpu, MSR_LASTINTTOIP); + + /* XXX hshoexer: virt vmload/vmsave */ + vmcb->v_lbr_virt_enable |= 0x2; } /* Enable SVME in EFER (must always be set) */ vmcb->v_efer |= EFER_SVME; if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0) - return ret; + goto exit; /* xcr0 power on default sets bit 0 (x87 state) */ vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask; @@ -1691,6 +1695,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s ret = vcpu_svm_init_vmsa(vcpu, vrs); +exit: return ret; } @@ -1709,6 +1714,9 @@ vcpu_svm_init_vmsa(struct vcpu *vcpu, st if (!vcpu->vc_seves) return 0; + if (vmcb->v_dr7 & ~0x00000400) /* XXX hshoexer? */ + return 1; + vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va; memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout)); @@ -2889,6 +2897,28 @@ vcpu_init_svm(struct vcpu *vcpu, struct (uint64_t)vcpu->vc_svm_hsa_va, (uint64_t)vcpu->vc_svm_hsa_pa); + + /* Allocate VM save area VA */ + vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, + &kp_zero, &kd_waitok); + + if (!vcpu->vc_svm_vmsa_va) { + ret = ENOMEM; + goto exit; + } + + /* Compute VM save area PA */ + if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_vmsa_va, + &vcpu->vc_svm_vmsa_pa)) { + ret = ENOMEM; + goto exit; + } + + DPRINTF("%s: VMSA va @ 0x%llx, pa @ 0x%llx\n", __func__, + (uint64_t)vcpu->vc_svm_vmsa_va, + (uint64_t)vcpu->vc_svm_vmsa_pa); + + /* Allocate IOIO area VA (3 pages) */ vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any, &vmm_kp_contig, &kd_waitok); @@ -2909,27 +2939,9 @@ vcpu_init_svm(struct vcpu *vcpu, struct (uint64_t)vcpu->vc_svm_ioio_va, (uint64_t)vcpu->vc_svm_ioio_pa); - if (vcpu->vc_seves) { - /* Allocate VM save area VA */ - vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, - &kp_zero, &kd_waitok); - - if (!vcpu->vc_svm_vmsa_va) { - ret = ENOMEM; - goto exit; - } - - /* Compute VM save area PA */ - if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_vmsa_va, - &vcpu->vc_svm_vmsa_pa)) { - ret = ENOMEM; - goto exit; - } - - DPRINTF("%s: VMSA va @ 0x%llx, pa @ 0x%llx\n", __func__, - (uint64_t)vcpu->vc_svm_vmsa_va, - (uint64_t)vcpu->vc_svm_vmsa_pa); - } + /* Shall we enable SEV? */ + vcpu->vc_sev = vcp->vcp_sev; + vcpu->vc_seves = vcp->vcp_seves; /* Inform vmd(8) about ASID and C bit position. */ vcp->vcp_poscbit = amd64_pos_cbit; @@ -4285,6 +4297,7 @@ svm_handle_exit(struct vcpu *vcpu) case SVM_VMEXIT_EFER_WRITE_TRAP: case SVM_VMEXIT_CR0_WRITE_TRAP: case SVM_VMEXIT_CR4_WRITE_TRAP: + case SVM_VMEXIT_CR8_WRITE_TRAP: ret = svm_handle_efercr(vcpu, exit_reason); update_rip = 0; break; @@ -4330,8 +4343,10 @@ svm_vmgexit_sync_host(struct vcpu *vcpu) if (!vcpu->vc_seves) return (0); - if (vcpu->vc_svm_ghcb_va == 0) + if (vcpu->vc_svm_ghcb_va == 0) { + printf("%s: GHCB not set\n", __func__); return (0); + } ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va; if (!ghcb_valid(ghcb)) @@ -4608,6 +4623,8 @@ svm_handle_efercr(struct vcpu *vcpu, uin case SVM_VMEXIT_CR4_WRITE_TRAP: vmcb->v_cr4 = vmcb->v_exitinfo1; break; + /* XXX hshoexer: no state for CR8? */ + break; default: return (EINVAL); } @@ -6767,6 +6784,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct v * On exit, interrupts are disabled, and we are running with * the guest FPU state still possibly on the CPU. Save the FPU * state before re-enabling interrupts. + * + * XXX hshoexer: With SEV-ES we should be able to skip this. */ vmm_fpusave(vcpu); @@ -7362,7 +7381,7 @@ svm_get_vmsa_pa(uint32_t vmid, uint32_t return (error); vcpu = vm_find_vcpu(vm, vcpuid); - if (vcpu == NULL || !vcpu->vc_seves) { + if (vcpu == NULL) { ret = ENOENT; goto out; } Index: sys/arch/amd64/include/cpu.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/cpu.h,v diff -u -p -r1.180 cpu.h --- sys/arch/amd64/include/cpu.h 28 Apr 2025 16:18:25 -0000 1.180 +++ sys/arch/amd64/include/cpu.h 15 Jun 2025 11:03:01 -0000 @@ -502,7 +502,8 @@ void mp_setperf_init(void); #define CPU_INVARIANTTSC 17 /* has invariant TSC */ #define CPU_PWRACTION 18 /* action caused by power button */ #define CPU_RETPOLINE 19 /* cpu requires retpoline pattern */ -#define CPU_MAXID 20 /* number of valid machdep ids */ +#define CPU_SEVGUESTMODE 20 /* running as SEV guest */ +#define CPU_MAXID 21 /* number of valid machdep ids */ #define CTL_MACHDEP_NAMES { \ { 0, 0 }, \ @@ -525,6 +526,7 @@ void mp_setperf_init(void); { "invarianttsc", CTLTYPE_INT }, \ { "pwraction", CTLTYPE_INT }, \ { "retpoline", CTLTYPE_INT }, \ + { "sevguestmode", CTLTYPE_INT}, \ } #endif /* !_MACHINE_CPU_H_ */ Index: sys/arch/amd64/include/cpufunc.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/cpufunc.h,v diff -u -p -r1.44 cpufunc.h --- sys/arch/amd64/include/cpufunc.h 5 May 2025 23:02:39 -0000 1.44 +++ sys/arch/amd64/include/cpufunc.h 15 Jun 2025 11:03:01 -0000 @@ -439,6 +439,27 @@ breakpoint(void) __asm volatile("int $3"); } +/* VMGEXIT */ +static __inline void +vmgexit(void) +{ + __asm volatile("rep; vmmcall"); +} + +/* Request VM termination from hypervisor. */ +static __inline void +vmterminate(void) +{ + __asm volatile( + " movl $MSRPROTO_TERM_REQ, %%rdx ;" + " movl $MSR_SEV_GHCB, %%rcx ;" + " wrmsr ;" + " rep vmmcall ;" + "1: hlt ;" + " jmp 1b ;" + : :); +} + void amd64_errata(struct cpu_info *); void cpu_ucode_setup(void); void cpu_ucode_apply(struct cpu_info *); Index: sys/arch/amd64/include/ghcb.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/ghcb.h,v diff -u -p -r1.2 ghcb.h --- sys/arch/amd64/include/ghcb.h 28 May 2025 07:59:05 -0000 1.2 +++ sys/arch/amd64/include/ghcb.h 15 Jun 2025 11:03:01 -0000 @@ -19,13 +19,10 @@ #ifndef _MACHINE_GHCB_H_ #define _MACHINE_GHCB_H_ -#include - #define GHCB_OFFSET(m) ((m) / 8) #define GHCB_IDX(m) (GHCB_OFFSET((m)) / 8) #define GHCB_BIT(m) (GHCB_OFFSET((m)) % 8) -#define GHCB_XSS 0x140 #define GHCB_RAX 0x1F8 #define GHCB_RBX 0x318 #define GHCB_RCX 0x308 @@ -33,8 +30,6 @@ #define GHCB_SW_EXITCODE 0x390 #define GHCB_SW_EXITINFO1 0x398 #define GHCB_SW_EXITINFO2 0x3A0 -#define GHCB_SW_SCRATCH 0x3A8 -#define GHCB_XCR0 0x3E8 #define GHCB_MAX 0xFFF @@ -104,11 +99,15 @@ struct ghcb_sync { #define MSR_PROTO_CPUID_RESP 0x5 #define MSR_PROTO_TERMINATE 0x100 +extern vaddr_t ghcb_vaddr; +extern paddr_t ghcb_paddr; + void ghcb_clear(struct ghcb_sa *); int ghcb_valbm_set(uint8_t *, int); int ghcb_valbm_isset(uint8_t *, int); -int ghcb_verify_bm(uint8_t *, uint8_t *); int ghcb_valid(struct ghcb_sa *); +int ghcb_verify_bm(uint8_t *, uint8_t *); +int ghcb_verify_bm_guest(uint8_t *, uint8_t *); void ghcb_sync_val(int, int, struct ghcb_sync *); void ghcb_sync_out(struct trapframe *, uint64_t, uint64_t, uint64_t, Index: sys/arch/amd64/include/specialreg.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/specialreg.h,v diff -u -p -r1.117 specialreg.h --- sys/arch/amd64/include/specialreg.h 19 May 2025 08:36:36 -0000 1.117 +++ sys/arch/amd64/include/specialreg.h 15 Jun 2025 11:03:01 -0000 @@ -729,6 +729,7 @@ #define MSR_SEV_STATUS 0xc0010131 #define SEV_STAT_ENABLED 0x00000001 +#define SEV_STAT_ES_ENABLED 0x00000002 #define MSR_LS_CFG 0xc0011020 #define LS_CFG_DIS_LS2_SQUISH 0x02000000 Index: sys/arch/amd64/include/trap.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/include/trap.h,v diff -u -p -r1.5 trap.h --- sys/arch/amd64/include/trap.h 15 Apr 2023 01:22:50 -0000 1.5 +++ sys/arch/amd64/include/trap.h 15 Jun 2025 11:03:01 -0000 @@ -62,3 +62,4 @@ #define T_XMM 19 /* SSE FP exception */ #define T_VE 20 /* virtualization exception */ #define T_CP 21 /* control protection exception */ +#define T_VC 29 /* VMM communication exception */ Index: sys/dev/ic/psp.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/ic/psp.c,v diff -u -p -r1.18 psp.c --- sys/dev/ic/psp.c 20 May 2025 07:02:20 -0000 1.18 +++ sys/dev/ic/psp.c 15 Jun 2025 11:03:01 -0000 @@ -861,6 +861,10 @@ pspioctl(dev_t dev, u_long cmd, caddr_t error = psp_launch_update_data(sc, (struct psp_launch_update_data *)data, p); break; + case PSP_IOC_LAUNCH_UPDATE_VMSA: + error = psp_launch_update_vmsa(sc, + (struct psp_launch_update_vmsa *)data); + break; case PSP_IOC_LAUNCH_MEASURE: error = psp_launch_measure(sc, (struct psp_launch_measure *)data); Index: sys/dev/ic/pspvar.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/ic/pspvar.h,v diff -u -p -r1.7 pspvar.h --- sys/dev/ic/pspvar.h 25 Apr 2025 19:10:50 -0000 1.7 +++ sys/dev/ic/pspvar.h 15 Jun 2025 11:03:01 -0000 @@ -275,6 +275,8 @@ struct psp_snp_platform_status { #define PSP_IOC_SNP_GET_PSTATUS _IOR('P', 11, struct psp_snp_platform_status) #define PSP_IOC_INIT _IO('P', 12) #define PSP_IOC_SHUTDOWN _IO('P', 13) +#define PSP_IOC_LAUNCH_UPDATE_VMSA \ + _IOW('P', 14, struct psp_launch_update_vmsa) #define PSP_IOC_ENCRYPT_STATE _IOW('P', 254, struct psp_encrypt_state) #define PSP_IOC_GUEST_SHUTDOWN _IOW('P', 255, struct psp_guest_shutdown) Index: sys/dev/vmm/vmm.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/dev/vmm/vmm.c,v diff -u -p -r1.5 vmm.c --- sys/dev/vmm/vmm.c 20 May 2025 13:51:27 -0000 1.5 +++ sys/dev/vmm/vmm.c 15 Jun 2025 11:03:01 -0000 @@ -455,6 +455,8 @@ vm_create(struct vm_create_params *vcp, vcpu->vc_parent = vm; vcpu->vc_id = vm->vm_vcpu_ct; vm->vm_vcpu_ct++; + vcpu->vc_sev = vcp->vcp_sev; + vcpu->vc_seves = vcp->vcp_seves; if ((ret = vcpu_init(vcpu, vcp)) != 0) { printf("failed to init vcpu %d for vm %p\n", i, vm); vm_teardown(&vm); Index: usr.sbin/vmd/loadfile_elf.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/vmd/loadfile_elf.c,v diff -u -p -r1.50 loadfile_elf.c --- usr.sbin/vmd/loadfile_elf.c 26 Sep 2024 01:45:13 -0000 1.50 +++ usr.sbin/vmd/loadfile_elf.c 15 Jun 2025 11:02:41 -0000 @@ -110,7 +110,7 @@ union { } hdr; static void setsegment(struct mem_segment_descriptor *, uint32_t, - size_t, int, int, int, int); + size_t, int, int, int, int, int); static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); @@ -148,7 +148,7 @@ uint64_t pg_crypt = 0; */ static void setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, - int type, int dpl, int def32, int gran) + int type, int dpl, int def32, int gran, int lm) { sd->sd_lolimit = (int)limit; sd->sd_lobase = (int)base; @@ -157,7 +157,7 @@ setsegment(struct mem_segment_descriptor sd->sd_p = 1; sd->sd_hilimit = (int)limit >> 16; sd->sd_avl = 0; - sd->sd_long = 0; + sd->sd_long = lm; sd->sd_def32 = def32; sd->sd_gran = gran; sd->sd_hibase = (int)base >> 24; @@ -185,11 +185,13 @@ push_gdt(void) * Create three segment descriptors: * * GDT[0] : null descriptor. "Created" via memset above. - * GDT[1] (selector @ 0x8): Executable segment, for CS + * GDT[1] (selector @ 0x8): Executable segment (compat mode), for CS * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS + * GDT[3] (selector @ 0x18): Executable segment (long mode), for CS */ - setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); - setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); + setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1, 0); + setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1, 0); + setsegment(&sd[3], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 0, 1, 1); write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); sev_register_encryption(GDT_PAGE, PAGE_SIZE);