Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Emulate CPU ID register access on arm64
To:
jca@openbsd.org, patrick@openbsd.org
Cc:
tech@openbsd.org, brad@comstyle.com
Date:
Sun, 14 Jul 2024 11:25:27 +0200

Download raw body.

Thread
As mentioned in the HWCAP discussion, new arm64 bits are typically no
longer assigned and instead the HWCAP_CPUID bit is used to signal that
the kernel emulates access to the CPU ID registers.  The diff below
implements this.

The architecture is clearly designed to make emulattion possible and
easy.  There is a special trap for MSR access and it provides all the
detailt needed to emulate access without the need to read the
instruction.  The trapping from EL0 is probably done to let the OS
sanitize the values such that only features relevant to userland and
common to all CPU cores in the system are advertised.

There are some open questions though.  The diff is rather strict in
what access it emulates.  For now this is only the "known" ID_AA64_xxx
registers, that is the ID_AA64_xxx registers that are currently
defined by the architectures.  I think the architecture actually says
that the currently undefined ID_AA64_xxx registers should return zero,
and Linux allows access to them all.  But I would like to know when
userland processes actually start accessing those, so for now we'll
SIGILL.

I didn't implement emulation for the "32-bit" ID_xxx registers.  We
don't support excuting 32-bit processes so these registers should be
irrelevant.  But they can be accessed using arm64 instructions, so for
now we'll continue to SIGILL these as well.

Then ther are the MIDR_EL1 and MPIDR_EL1 registers.  Linux allows
access to these registers.  The problem with these is that their
values may depend on what CPU a process is executed on.  So in general
I don't think userland code should look at these.  However, some
userland code looks at these since certain optimizations might apply
only to specific CPU implementations.  On amd64, where the CPUID
instruction is available to userland, looking at the CPU familiy and
model bits is accepted practice.

Thoughts?


Index: arch/arm64/arm64/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/autoconf.c,v
retrieving revision 1.14
diff -u -p -r1.14 autoconf.c
--- arch/arm64/arm64/autoconf.c	8 Sep 2022 10:22:06 -0000	1.14
+++ arch/arm64/arm64/autoconf.c	14 Jul 2024 09:23:04 -0000
@@ -61,6 +61,8 @@ cpu_configure(void)
 
 	unmap_startup();
 
+	cpu_identify_cleanup();
+
 #ifdef CRYPTO
 	if (arm64_has_aes)
 		cryptox_setup();
Index: arch/arm64/arm64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/cpu.c,v
retrieving revision 1.125
diff -u -p -r1.125 cpu.c
--- arch/arm64/arm64/cpu.c	11 Jul 2024 12:07:39 -0000	1.125
+++ arch/arm64/arm64/cpu.c	14 Jul 2024 09:23:04 -0000
@@ -1019,6 +1019,40 @@ cpu_identify(struct cpu_info *ci)
 #endif
 }
 
+void
+cpu_identify_cleanup(void)
+{
+	uint64_t value;
+
+	/* ID_AA64ISAR0_EL1 */
+	value = cpu_id_aa64isar0 & ID_AA64ISAR0_MASK;
+	value &= ~ID_AA64ISAR0_TLB_MASK;
+	cpu_id_aa64isar0 = value;
+
+	/* ID_AA64ISAR1_EL1 */
+	value = cpu_id_aa64isar1 &= ID_AA64ISAR1_MASK;
+	value &= ~ID_AA64ISAR1_SPECRES_MASK;
+	cpu_id_aa64isar1 = value;
+
+	/* ID_AA64ISAR2_EL1 */
+	value = cpu_id_aa64isar2 &= ID_AA64ISAR2_MASK;
+	value &= ~ID_AA64ISAR2_CLRBHB_MASK;
+	cpu_id_aa64isar2 = value;
+
+	/* ID_AA64PFR0_EL1 */
+	value = 0;
+	value |= cpu_id_aa64pfr0 & ID_AA64PFR0_FP_MASK;
+	value |= cpu_id_aa64pfr0 & ID_AA64PFR0_ADV_SIMD_MASK;
+	value |= cpu_id_aa64pfr0 & ID_AA64PFR0_DIT_MASK;
+	cpu_id_aa64pfr0 = value;
+
+	/* ID_AA64PFR1_EL1 */
+	value = 0;
+	value |= cpu_id_aa64pfr1 & ID_AA64PFR1_BT_MASK;
+	value |= cpu_id_aa64pfr1 & ID_AA64PFR1_SSBS_MASK;
+	cpu_id_aa64pfr1 = value;
+}
+
 void	cpu_init(void);
 int	cpu_start_secondary(struct cpu_info *ci, int, uint64_t);
 int	cpu_clockspeed(int *);
Index: arch/arm64/arm64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/machdep.c,v
retrieving revision 1.90
diff -u -p -r1.90 machdep.c
--- arch/arm64/arm64/machdep.c	3 Jul 2024 21:04:04 -0000	1.90
+++ arch/arm64/arm64/machdep.c	14 Jul 2024 09:23:04 -0000
@@ -332,7 +332,6 @@ cpu_sysctl(int *name, u_int namelen, voi
 {
 	char *compatible;
 	int node, len, error;
-	uint64_t value;
 
 	/* all sysctl names at this level are terminal */
 	if (namelen != 1)
@@ -351,25 +350,15 @@ cpu_sysctl(int *name, u_int namelen, voi
 		free(compatible, M_TEMP, len);
 		return error;
 	case CPU_ID_AA64ISAR0:
-		value = cpu_id_aa64isar0 & ID_AA64ISAR0_MASK;
-		value &= ~ID_AA64ISAR0_TLB_MASK;
-		return sysctl_rdquad(oldp, oldlenp, newp, value);
+		return sysctl_rdquad(oldp, oldlenp, newp, cpu_id_aa64isar0);
 	case CPU_ID_AA64ISAR1:
-		value = cpu_id_aa64isar1 & ID_AA64ISAR1_MASK;
-		value &= ~ID_AA64ISAR1_SPECRES_MASK;
-		return sysctl_rdquad(oldp, oldlenp, newp, value);
+		return sysctl_rdquad(oldp, oldlenp, newp, cpu_id_aa64isar1);
+	case CPU_ID_AA64ISAR2:
+		return sysctl_rdquad(oldp, oldlenp, newp, cpu_id_aa64isar2);
 	case CPU_ID_AA64PFR0:
-		value = 0;
-		value |= cpu_id_aa64pfr0 & ID_AA64PFR0_FP_MASK;
-		value |= cpu_id_aa64pfr0 & ID_AA64PFR0_ADV_SIMD_MASK;
-		value |= cpu_id_aa64pfr0 & ID_AA64PFR0_DIT_MASK;
-		return sysctl_rdquad(oldp, oldlenp, newp, value);
+		return sysctl_rdquad(oldp, oldlenp, newp, cpu_id_aa64pfr0);
 	case CPU_ID_AA64PFR1:
-		value = 0;
-		value |= cpu_id_aa64pfr1 & ID_AA64PFR1_BT_MASK;
-		value |= cpu_id_aa64pfr1 & ID_AA64PFR1_SSBS_MASK;
-		return sysctl_rdquad(oldp, oldlenp, newp, value);
-	case CPU_ID_AA64ISAR2:
+		return sysctl_rdquad(oldp, oldlenp, newp, cpu_id_aa64pfr1);
 	case CPU_ID_AA64MMFR0:
 	case CPU_ID_AA64MMFR1:
 	case CPU_ID_AA64MMFR2:
Index: arch/arm64/arm64/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
retrieving revision 1.48
diff -u -p -r1.48 trap.c
--- arch/arm64/arm64/trap.c	21 Feb 2024 15:53:07 -0000	1.48
+++ arch/arm64/arm64/trap.c	14 Jul 2024 09:23:04 -0000
@@ -187,6 +187,80 @@ kdata_abort(struct trapframe *frame, uin
 	}
 }
 
+static int
+emulate_msr(struct trapframe *frame, uint64_t esr)
+{
+	u_int rt = ISS_MSR_Rt(esr);
+	uint64_t val;
+
+	/* Only emulate reads. */
+	if ((esr & ISS_MSR_DIR) == 0)
+		return 0;
+
+	/* Only emulate non-debug System register access. */
+	if (ISS_MSR_OP0(esr) != 3 || ISS_MSR_OP1(esr) != 0 ||
+	    ISS_MSR_CRn(esr) != 0)
+		return 0;
+
+	switch (ISS_MSR_CRm(esr)) {
+	case 4:
+		switch (ISS_MSR_OP2(esr)) {
+		case 0:		/* ID_AA64PFR0_EL1 */
+			val = cpu_id_aa64pfr0;
+			break;
+		case 1:		/* ID_AA64PFR1_EL1 */
+			val = cpu_id_aa64pfr1;
+			break;
+		case 2:		/* ID_AA64PFR2_EL1 */
+		case 4:		/* ID_AA64ZFR0_EL1 */
+		case 5:		/* ID_AA64SMFR0_EL1 */
+			val = 0;
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case 6:
+		switch (ISS_MSR_OP2(esr)) {
+		case 0:	/* ID_AA64ISAR0_EL1 */
+			val = cpu_id_aa64isar0;
+			break;
+		case 1: /* ID_AA64ISAR1_EL1 */
+			val = cpu_id_aa64isar1;
+			break;
+		case 2: /* ID_AA64ISAR2_EL2 */
+			val = cpu_id_aa64isar2;
+			break;
+		default:
+			return 0;
+		}
+		break;
+	case 7:
+		switch (ISS_MSR_OP2(esr)) {
+		case 0: /* ID_AA64MMFR0_EL1 */
+		case 1: /* ID_AA64MMFR1_EL1 */
+		case 2: /* ID_AA64MMFR2_EL1 */
+		case 3: /* ID_AA64MMFR3_EL1 */
+		case 4: /* ID_AA64MMFR4_EL1 */
+			val = 0;
+			break;
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	if (rt < 30)
+		frame->tf_x[rt] = val;
+	else if (rt == 30)
+		frame->tf_lr = val;
+	frame->tf_elr += 4;
+
+	return 1;
+}
+
 void
 do_el1h_sync(struct trapframe *frame)
 {
@@ -288,6 +362,10 @@ do_el0_sync(struct trapframe *frame)
 		sv.sival_ptr = (void *)frame->tf_elr;
 		trapsignal(p, SIGILL, esr, ILL_BTCFI, sv);
 		break;
+	case EXCP_MSR:
+		if (emulate_msr(frame, esr))
+			break;
+		/* FALLTHROUGH */
 	case EXCP_FPAC:
 		curcpu()->ci_flush_bp();
 		sv.sival_ptr = (void *)frame->tf_elr;
Index: arch/arm64/include/armreg.h
===================================================================
RCS file: /cvs/src/sys/arch/arm64/include/armreg.h,v
retrieving revision 1.35
diff -u -p -r1.35 armreg.h
--- arch/arm64/include/armreg.h	23 Jun 2024 10:17:16 -0000	1.35
+++ arch/arm64/include/armreg.h	14 Jul 2024 09:23:04 -0000
@@ -171,6 +171,26 @@
 #define	 ISS_DATA_DFSC_ECC_L3	(0x1f << 0)
 #define	 ISS_DATA_DFSC_ALIGN	(0x21 << 0)
 #define	 ISS_DATA_DFSC_TLB_CONFLICT (0x30 << 0)
+#define	 ISS_MSR_DIR_SHIFT	0
+#define	 ISS_MSR_DIR		(0x01 << ISS_MSR_DIR_SHIFT)
+#define	 ISS_MSR_Rt_SHIFT	5
+#define	 ISS_MSR_Rt_MASK	(0x1f << ISS_MSR_Rt_SHIFT)
+#define	 ISS_MSR_Rt(x)		(((x) & ISS_MSR_Rt_MASK) >> ISS_MSR_Rt_SHIFT)
+#define	 ISS_MSR_CRm_SHIFT	1
+#define	 ISS_MSR_CRm_MASK	(0xf << ISS_MSR_CRm_SHIFT)
+#define	 ISS_MSR_CRm(x)		(((x) & ISS_MSR_CRm_MASK) >> ISS_MSR_CRm_SHIFT)
+#define	 ISS_MSR_CRn_SHIFT	10
+#define	 ISS_MSR_CRn_MASK	(0xf << ISS_MSR_CRn_SHIFT)
+#define	 ISS_MSR_CRn(x)		(((x) & ISS_MSR_CRn_MASK) >> ISS_MSR_CRn_SHIFT)
+#define	 ISS_MSR_OP1_SHIFT	14
+#define	 ISS_MSR_OP1_MASK	(0x7 << ISS_MSR_OP1_SHIFT)
+#define	 ISS_MSR_OP1(x)		(((x) & ISS_MSR_OP1_MASK) >> ISS_MSR_OP1_SHIFT)
+#define	 ISS_MSR_OP2_SHIFT	17
+#define	 ISS_MSR_OP2_MASK	(0x7 << ISS_MSR_OP2_SHIFT)
+#define	 ISS_MSR_OP2(x)		(((x) & ISS_MSR_OP2_MASK) >> ISS_MSR_OP2_SHIFT)
+#define	 ISS_MSR_OP0_SHIFT	20
+#define	 ISS_MSR_OP0_MASK	(0x3 << ISS_MSR_OP0_SHIFT)
+#define	 ISS_MSR_OP0(x)		(((x) & ISS_MSR_OP0_MASK) >> ISS_MSR_OP0_SHIFT)
 #define	ESR_ELx_IL		(0x01 << 25)
 #define	ESR_ELx_EC_SHIFT	26
 #define	ESR_ELx_EC_MASK		(0x3f << 26)
Index: arch/arm64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/arm64/include/cpu.h,v
retrieving revision 1.48
diff -u -p -r1.48 cpu.h
--- arch/arm64/include/cpu.h	10 Jul 2024 11:01:24 -0000	1.48
+++ arch/arm64/include/cpu.h	14 Jul 2024 09:23:04 -0000
@@ -63,8 +63,11 @@
 
 extern uint64_t cpu_id_aa64isar0;
 extern uint64_t cpu_id_aa64isar1;
+extern uint64_t cpu_id_aa64isar2;
 extern uint64_t cpu_id_aa64pfr0;
 extern uint64_t cpu_id_aa64pfr1;
+
+void cpu_identify_cleanup(void);
 
 #include <machine/intr.h>
 #include <machine/frame.h>