Index | Thread | Search

From:
Mike Larkin <mlarkin@nested.page>
Subject:
Re: SEV-ES guest: Implement CPUID #VC, IOIO #VC, MSR #VC 3/3
To:
tech@openbsd.org
Date:
Thu, 26 Jun 2025 13:58:32 -0700

Download raw body.

Thread
On Thu, Jun 26, 2025 at 02:20:20PM +0200, Hans-Jörg Höxer wrote:
> Hi,
>
> this is change 3/3.  With this change, SEV-ES enabled guest can be run
> on vmm(4)/vmd(8).
>
> Fill in the handling of cpuid, in/out and rdmsr/wrmsr.  For the
> in/out and rdmsr/wrmsr we have to decode the actual instructions.
> Use the GHCB to request emulation of these instructions from vmm(4).
>
> Take care,
> Hans-Joerg
>

See below for one request, ok mlarkin in any case.

-ml

> --
> commit dbe7557824d97848daf41c65ea0cef92a153ad50
> Author: Hans-Joerg Hoexer <hshoexer@genua.de>
> Date:   Thu Nov 21 15:57:33 2024 +0100
>
>     SEV-ES guest: Implement CPUID #VC, IOIO #VC, MSR #VC
>
>     Fill in the handling of cpuid, in/out and rdmsr/wrmsr.  For the
>     in/out and rdmsr/wrmsr we have to decode the actual instructions.
>     Use the GHCB to request emulation of these instructions from vmm(4).
>
>     With this change, SEV-ES enabled guest are able to run on vmm(4)/vmd(8).
>
> diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
> index bb445a60894..37b0dfbe8ba 100644
> --- a/sys/arch/amd64/amd64/trap.c
> +++ b/sys/arch/amd64/amd64/trap.c
> @@ -305,6 +305,8 @@ int
>  vctrap(struct trapframe *frame)
>  {
>  	uint64_t	 sw_exitcode, sw_exitinfo1, sw_exitinfo2;
> +	uint8_t		*rip = (uint8_t *)(frame->tf_rip);
> +	uint16_t	 port;
>  	struct ghcb_sync syncout, syncin;
>  	struct ghcb_sa	*ghcb;
>
> @@ -318,6 +320,99 @@ vctrap(struct trapframe *frame)
>  	sw_exitinfo2 = 0;
>
>  	switch (sw_exitcode) {
> +	case SVM_VMEXIT_CPUID:
> +		ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout);
> +		ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout);
> +		ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin);
> +		ghcb_sync_val(GHCB_RBX, GHCB_SZ32, &syncin);
> +		ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncin);
> +		ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin);
> +		frame->tf_rip += 2;
> +		break;
> +	case SVM_VMEXIT_MSR: {
> +		if (*rip == 0x0f && *(rip + 1) == 0x30) {

Maybe it's just me but it took me a few minutes to wrap my head around this
when I read it for the first time in that earlier version of the diff.

For posterity, can we add something like this (somewhere, maybe above the
switch?) :

	/*
	 * The #VC trap occurs when the guest (us) performs an operation which
	 * requires sharing data with the host. In order to ascertain
	 * which instruction caused the #VC, examine the instruction by
	 * reading %rip, Then, sync the appropriate values out (to the host),
	 * perform VMGEXIT to request that the host handle the operation which
	 * caused the #VC, then sync the returned values back in (from
	 * the host).
	 */

> +			/* WRMSR */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout);
> +			ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout);
> +			ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncout);
> +			sw_exitinfo1 = 1;
> +		} else if (*rip == 0x0f && *(rip + 1) == 0x32) {
> +			/* RDMSR */
> +			ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout);
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin);
> +			ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin);
> +		} else
> +			panic("failed to decode MSR");
> +		frame->tf_rip += 2;
> +		break;
> +	    }
> +	case SVM_VMEXIT_IOIO: {
> +		switch (*rip) {
> +		case 0x66: {
> +			switch (*(rip + 1)) {
> +			case 0xef:	/* out %ax,(%dx) */
> +				ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncout);
> +				port = (uint16_t)frame->tf_rdx;
> +				sw_exitinfo1 = (port << 16) |
> +				    (1ULL << 5);
> +				frame->tf_rip += 2;
> +				break;
> +			case 0xed:	/* in (%dx),%ax */
> +				ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncin);
> +				port = (uint16_t)frame->tf_rdx;
> +				sw_exitinfo1 = (port << 16) |
> +				    (1ULL << 5) | (1ULL << 0);
> +				frame->tf_rip += 2;
> +				break;
> +			default:
> +				panic("failed to decode prefixed IOIO");
> +			}
> +			break;
> +		    }
> +		case 0xe4:	/* in $port,%al */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin);
> +			port = *(rip + 1);
> +			sw_exitinfo1 = (port << 16) | (1ULL << 4) |
> +			    (1ULL << 0);
> +			frame->tf_rip += 2;
> +			break;
> +		case 0xe6:	/* outb %al,$port */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout);
> +			port = *(rip + 1);
> +			sw_exitinfo1 = (port << 16) | (1ULL << 4);
> +			frame->tf_rip += 2;
> +			break;
> +		case 0xec:	/* in (%dx),%al */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin);
> +			port = (uint16_t)frame->tf_rdx;
> +			sw_exitinfo1 = (port << 16) | (1ULL << 4) |
> +			    (1ULL << 0);
> +			frame->tf_rip += 1;
> +			break;
> +		case 0xed:	/* in (%dx),%eax */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin);
> +			port = (uint16_t)frame->tf_rdx;
> +			sw_exitinfo1 = (port << 16) | (1ULL << 6) |
> +			    (1ULL << 0);
> +			frame->tf_rip += 1;
> +			break;
> +		case 0xee:	/* out %al,(%dx) */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout);
> +			port = (uint16_t)frame->tf_rdx;
> +			sw_exitinfo1 = (port << 16) | (1ULL << 4);
> +			frame->tf_rip += 1;
> +			break;
> +		case 0xef:	/* out %eax,(%dx) */
> +			ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout);
> +			port = (uint16_t)frame->tf_rdx;
> +			sw_exitinfo1 = (port << 16) | (1ULL << 6);
> +			frame->tf_rip += 1;
> +			break;
> +		default:
> +			panic("failed to decode IOIO");
> +		}
> +		break;
> +	    }
>  	default:
>  		panic("invalid exit code 0x%llx", sw_exitcode);
>  	}