From: Mike Larkin Subject: Re: SEV-ES guest: Implement CPUID #VC, IOIO #VC, MSR #VC 3/3 To: tech@openbsd.org Date: Thu, 26 Jun 2025 13:58:32 -0700 On Thu, Jun 26, 2025 at 02:20:20PM +0200, Hans-Jörg Höxer wrote: > Hi, > > this is change 3/3. With this change, SEV-ES enabled guest can be run > on vmm(4)/vmd(8). > > Fill in the handling of cpuid, in/out and rdmsr/wrmsr. For the > in/out and rdmsr/wrmsr we have to decode the actual instructions. > Use the GHCB to request emulation of these instructions from vmm(4). > > Take care, > Hans-Joerg > See below for one request, ok mlarkin in any case. -ml > -- > commit dbe7557824d97848daf41c65ea0cef92a153ad50 > Author: Hans-Joerg Hoexer > Date: Thu Nov 21 15:57:33 2024 +0100 > > SEV-ES guest: Implement CPUID #VC, IOIO #VC, MSR #VC > > Fill in the handling of cpuid, in/out and rdmsr/wrmsr. For the > in/out and rdmsr/wrmsr we have to decode the actual instructions. > Use the GHCB to request emulation of these instructions from vmm(4). > > With this change, SEV-ES enabled guest are able to run on vmm(4)/vmd(8). > > diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c > index bb445a60894..37b0dfbe8ba 100644 > --- a/sys/arch/amd64/amd64/trap.c > +++ b/sys/arch/amd64/amd64/trap.c > @@ -305,6 +305,8 @@ int > vctrap(struct trapframe *frame) > { > uint64_t sw_exitcode, sw_exitinfo1, sw_exitinfo2; > + uint8_t *rip = (uint8_t *)(frame->tf_rip); > + uint16_t port; > struct ghcb_sync syncout, syncin; > struct ghcb_sa *ghcb; > > @@ -318,6 +320,99 @@ vctrap(struct trapframe *frame) > sw_exitinfo2 = 0; > > switch (sw_exitcode) { > + case SVM_VMEXIT_CPUID: > + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout); > + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout); > + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin); > + ghcb_sync_val(GHCB_RBX, GHCB_SZ32, &syncin); > + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncin); > + ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin); > + frame->tf_rip += 2; > + break; > + case SVM_VMEXIT_MSR: { > + if (*rip == 0x0f && *(rip + 1) == 0x30) { Maybe it's just me but it took me a few minutes to wrap my head around this when I read it for the first time in that earlier version of the diff. For posterity, can we add something like this (somewhere, maybe above the switch?) : /* * The #VC trap occurs when the guest (us) performs an operation which * requires sharing data with the host. In order to ascertain * which instruction caused the #VC, examine the instruction by * reading %rip, Then, sync the appropriate values out (to the host), * perform VMGEXIT to request that the host handle the operation which * caused the #VC, then sync the returned values back in (from * the host). */ > + /* WRMSR */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout); > + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout); > + ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncout); > + sw_exitinfo1 = 1; > + } else if (*rip == 0x0f && *(rip + 1) == 0x32) { > + /* RDMSR */ > + ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout); > + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin); > + ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin); > + } else > + panic("failed to decode MSR"); > + frame->tf_rip += 2; > + break; > + } > + case SVM_VMEXIT_IOIO: { > + switch (*rip) { > + case 0x66: { > + switch (*(rip + 1)) { > + case 0xef: /* out %ax,(%dx) */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncout); > + port = (uint16_t)frame->tf_rdx; > + sw_exitinfo1 = (port << 16) | > + (1ULL << 5); > + frame->tf_rip += 2; > + break; > + case 0xed: /* in (%dx),%ax */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncin); > + port = (uint16_t)frame->tf_rdx; > + sw_exitinfo1 = (port << 16) | > + (1ULL << 5) | (1ULL << 0); > + frame->tf_rip += 2; > + break; > + default: > + panic("failed to decode prefixed IOIO"); > + } > + break; > + } > + case 0xe4: /* in $port,%al */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin); > + port = *(rip + 1); > + sw_exitinfo1 = (port << 16) | (1ULL << 4) | > + (1ULL << 0); > + frame->tf_rip += 2; > + break; > + case 0xe6: /* outb %al,$port */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout); > + port = *(rip + 1); > + sw_exitinfo1 = (port << 16) | (1ULL << 4); > + frame->tf_rip += 2; > + break; > + case 0xec: /* in (%dx),%al */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin); > + port = (uint16_t)frame->tf_rdx; > + sw_exitinfo1 = (port << 16) | (1ULL << 4) | > + (1ULL << 0); > + frame->tf_rip += 1; > + break; > + case 0xed: /* in (%dx),%eax */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin); > + port = (uint16_t)frame->tf_rdx; > + sw_exitinfo1 = (port << 16) | (1ULL << 6) | > + (1ULL << 0); > + frame->tf_rip += 1; > + break; > + case 0xee: /* out %al,(%dx) */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout); > + port = (uint16_t)frame->tf_rdx; > + sw_exitinfo1 = (port << 16) | (1ULL << 4); > + frame->tf_rip += 1; > + break; > + case 0xef: /* out %eax,(%dx) */ > + ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout); > + port = (uint16_t)frame->tf_rdx; > + sw_exitinfo1 = (port << 16) | (1ULL << 6); > + frame->tf_rip += 1; > + break; > + default: > + panic("failed to decode IOIO"); > + } > + break; > + } > default: > panic("invalid exit code 0x%llx", sw_exitcode); > }