Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
Re: cpu_xcall for amd64, was: rewrite wbinvd_on_all_cpus_acked() using cpu_xcall(9)
To:
Mark Kettenis <mark.kettenis@xs4all.nl>
Cc:
tech@openbsd.org
Date:
Sun, 9 Nov 2025 09:54:23 +1000

Download raw body.

Thread
On Sat, Nov 08, 2025 at 06:18:50PM +0100, Mark Kettenis wrote:
> > Date: Fri, 7 Nov 2025 20:20:53 +1000
> > From: David Gwynne <david@gwynne.id.au>
> > 
> > On Thu, Nov 06, 2025 at 11:02:00AM -0800, Mike Larkin wrote:
> > > On Thu, Nov 06, 2025 at 07:12:36PM +0100, hshoexer wrote:
> > > > Hi,
> > > >
> > > > On Thu, Nov 06, 2025 at 12:18:27PM -0500, Dave Voutila wrote:
> > > > > hshoexer <hshoexer@yerbouti.franken.de> writes:
> > > > >
> > > > > > Hi,
> > > > > >
> > > > > > I'd like to simplify the implementation of amd64's
> > > > > > wbinvd_on_all_cpus_acked() by using cpu_xcall(9).
> > > > > >
> > > > > > The diff below rewrites wbinvd_on_all_cpus_acked() accordingly.
> > > > > >
> > > > > > The required xcall glue code for amd64 is from this [1] mail by
> > > > > > dlg@.  If the wbinvd_on_all_cpus_acked() is ok, I'm fine with
> > > > > > splitting this diff.
> > > > > >
> > > > > > Thoughts?
> > > > > >
> > > > >
> > > > > I believe dlg@ has some changes he may not have shared that are not in
> > > > > tree that shuffle a bit of how things split across MI/MD.
> > > > >
> > > > > Given we're post-release, I think it's a good time to start that
> > > > > conversation again.
> > > >
> > > > excellent! The wbinvd rewrite is quite simple.  I'd be happy to use
> > > > it to test drive cpu_xcall.
> > > >
> > > 
> > > yeah, the xcall stuff has to get finished first then we can look at
> > > this diff (wbinvd)
> > 
> > ok.
> > 
> > the problem i found was that it's not safe to call the MI softintr
> > code from IPL_IPI, which the in tree version of cpu_xcall relies
> > on. to cope i've redrawn the line between the MD and MI code needed to
> > implement xcalls. the arch is now responsible for calling the xcall
> > dispatch code from IPL_SOFTCLOCK, rather than calling softintr
> > dispatch from IPL_IPI.
> 
> A few remarks about the new MD code below.
> 
> > this means there's more MD glue, which means it'll be harder to get an
> > MI xcall API in to the tree in one go. however, so far i've only found a
> > use for the xcall API on amd64 and arm64, so i don't think we need to
> > wait.
> 
> Yes, I don't immediately see a truly MI use case for these.

snip

> 
> > Index: arch/amd64/include/intrdefs.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v
> > diff -u -p -r1.24 intrdefs.h
> > --- arch/amd64/include/intrdefs.h	26 May 2024 13:37:31 -0000	1.24
> > +++ arch/amd64/include/intrdefs.h	16 Sep 2025 12:07:26 -0000
> > @@ -55,9 +55,10 @@
> >  #define	SIR_CLOCK	61
> >  #define	SIR_NET		60
> >  #define	SIR_TTY		59
> > +#define	SIR_XCALL	58
> 
> I think these should be in order of decreasing priotity.  So SIR_XCALL
> should be immediately before or immediately after SIR_CLOCK.

ok.

> This consumes an extra interrupt "slot" on each CPU.  And we're
> already short on those...
> 
> > -#define	LIR_XEN		58
> > -#define	LIR_HYPERV	57
> > +#define	LIR_XEN		57
> > +#define	LIR_HYPERV	56
> 
> But maybe we can reclaim these two slots on systems that don't need
> them?  That's a different diff though.

yeah.

so ok on this?

Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
diff -u -p -r1.200 cpu.c
--- arch/amd64/amd64/cpu.c	22 Sep 2025 13:19:03 -0000	1.200
+++ arch/amd64/amd64/cpu.c	8 Nov 2025 23:45:14 -0000
@@ -642,6 +638,8 @@ cpu_attach(struct device *parent, struct
 #endif
 
 #if defined(MULTIPROCESSOR)
+	cpu_xcall_establish(ci);
+
 	/*
 	 * Allocate UPAGES contiguous pages for the idle PCB and stack.
 	 */
Index: arch/amd64/amd64/intr.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v
diff -u -p -r1.63 intr.c
--- arch/amd64/amd64/intr.c	11 Jun 2025 09:57:01 -0000	1.63
+++ arch/amd64/amd64/intr.c	8 Nov 2025 23:45:14 -0000
@@ -577,7 +589,9 @@ struct intrhand fake_softclock_intrhand;
 struct intrhand fake_softnet_intrhand;
 struct intrhand fake_softtty_intrhand;
 struct intrhand fake_timer_intrhand;
+#ifdef MULTIPROCESSOR
 struct intrhand fake_ipi_intrhand;
+#endif
 #if NXEN > 0
 struct intrhand fake_xen_intrhand;
 #endif
@@ -644,6 +658,16 @@ cpu_intr_init(struct cpu_info *ci)
 	isp->is_handlers = &fake_ipi_intrhand;
 	isp->is_pic = &local_pic;
 	ci->ci_isources[LIR_IPI] = isp;
+
+	isp = malloc(sizeof(*isp), M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (isp == NULL)
+		panic("can't allocate fixed interrupt source");
+	isp->is_recurse = Xxcallintr;
+	isp->is_resume = Xxcallintr;
+	fake_ipi_intrhand.ih_level = IPL_SOFTCLOCK;
+	isp->is_handlers = &fake_ipi_intrhand;
+	isp->is_pic = &local_pic;
+	ci->ci_isources[SIR_XCALL] = isp;
 #endif
 #if NXEN > 0
 	isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO);
Index: arch/amd64/amd64/ipifuncs.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v
diff -u -p -r1.39 ipifuncs.c
--- arch/amd64/amd64/ipifuncs.c	7 Jun 2024 16:53:35 -0000	1.39
+++ arch/amd64/amd64/ipifuncs.c	8 Nov 2025 23:45:14 -0000
@@ -61,6 +61,7 @@
 void x86_64_ipi_nop(struct cpu_info *);
 void x86_64_ipi_halt(struct cpu_info *);
 void x86_64_ipi_wbinvd(struct cpu_info *);
+void x86_64_ipi_xcall(struct cpu_info *);
 
 #if NVMM > 0
 void x86_64_ipi_vmclear_vmm(struct cpu_info *);
@@ -108,6 +109,7 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf
 	NULL,
 #endif
 	x86_64_ipi_wbinvd,
+	x86_64_ipi_xcall,
 };
 
 void
@@ -169,3 +171,9 @@ x86_64_ipi_wbinvd(struct cpu_info *ci)
 {
 	wbinvd();
 }
+
+void
+x86_64_ipi_xcall(struct cpu_info *ci)
+{
+	x86_atomic_setbits_u64(&ci->ci_ipending, 1UL << SIR_XCALL);
+};
Index: arch/amd64/amd64/vector.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v
diff -u -p -r1.103 vector.S
--- arch/amd64/amd64/vector.S	11 Jul 2025 20:04:20 -0000	1.103
+++ arch/amd64/amd64/vector.S	8 Nov 2025 23:45:14 -0000
@@ -1408,3 +1408,19 @@ KIDTVEC(softclock)
 	jmp	retpoline_r13
 	CODEPATCH_END(CPTAG_RETPOLINE_R13)
 END(Xsoftclock)
+
+#ifdef MULTIPROCESSOR
+KIDTVEC(xcallintr)
+	endbr64
+	movl	$IPL_SOFTCLOCK, CPUVAR(ILEVEL)
+	sti
+	incl	CPUVAR(IDEPTH)
+	movq	CPUVAR(SELF),%rdi
+	call	cpu_xcall_dispatch
+	movq	$0,-8(%rsp)
+	decl	CPUVAR(IDEPTH)
+	CODEPATCH_START
+	jmp	retpoline_r13
+	CODEPATCH_END(CPTAG_RETPOLINE_R13)
+END(Xxcallintr)
+#endif /* MULTIPROCESSOR */
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
diff -u -p -r1.181 cpu.h
--- arch/amd64/include/cpu.h	22 Oct 2025 14:11:23 -0000	1.181
+++ arch/amd64/include/cpu.h	8 Nov 2025 23:45:14 -0000
@@ -53,6 +53,7 @@
 #include <sys/sched.h>
 #include <sys/sensors.h>
 #include <sys/srp.h>
+#include <sys/xcall.h>
 #include <uvm/uvm_percpu.h>
 
 #ifdef _KERNEL
@@ -215,6 +216,7 @@ struct cpu_info {
 
 #ifdef MULTIPROCESSOR
 	struct srp_hazard	ci_srp_hazards[SRP_HAZARD_NUM];
+	struct xcall_cpu	ci_xcall;
 #define __HAVE_UVM_PERCPU
 	struct uvm_pmr_cache	ci_uvm;		/* [o] page cache */
 #endif
Index: arch/amd64/include/intr.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/intr.h,v
diff -u -p -r1.36 intr.h
--- arch/amd64/include/intr.h	11 Jun 2025 09:57:01 -0000	1.36
+++ arch/amd64/include/intr.h	8 Nov 2025 23:45:14 -0000
@@ -222,7 +222,10 @@ void x86_ipi_handler(void);
 void x86_setperf_ipi(struct cpu_info *);
 
 extern void (*ipifunc[X86_NIPI])(struct cpu_info *);
-#endif
+
+#define cpu_xcall_ipi(_ci) x86_send_ipi((_ci), X86_IPI_XCALL)
+void Xxcallintr(void);
+#endif /* MULTIPROCESSOR */
 
 #endif /* !_LOCORE */
 
Index: arch/amd64/include/intrdefs.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v
diff -u -p -r1.24 intrdefs.h
--- arch/amd64/include/intrdefs.h	26 May 2024 13:37:31 -0000	1.24
+++ arch/amd64/include/intrdefs.h	8 Nov 2025 23:45:14 -0000
@@ -52,12 +52,13 @@
 #define LIR_TIMER	62
 
 /* Soft interrupt masks. */
-#define	SIR_CLOCK	61
-#define	SIR_NET		60
-#define	SIR_TTY		59
+#define	SIR_XCALL	61
+#define	SIR_CLOCK	60
+#define	SIR_NET		59
+#define	SIR_TTY		58
 
-#define	LIR_XEN		58
-#define	LIR_HYPERV	57
+#define	LIR_XEN		57
+#define	LIR_HYPERV	56
 
 /*
  * Maximum # of interrupt sources per CPU. 64 to fit in one word.
@@ -85,8 +86,9 @@
 #define X86_IPI_START_VMM		0x00000100
 #define X86_IPI_STOP_VMM		0x00000200
 #define X86_IPI_WBINVD			0x00000400
+#define X86_IPI_XCALL			0x00000800
 
-#define X86_NIPI			12
+#define X86_NIPI			13
 
 #define IREENT_MAGIC	0x18041969
 
Index: kern/kern_xcall.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_xcall.c,v
diff -u -p -r1.1 kern_xcall.c
--- kern/kern_xcall.c	13 Jul 2025 05:45:21 -0000	1.1
+++ kern/kern_xcall.c	8 Nov 2025 23:45:14 -0000
@@ -106,10 +106,10 @@ cpu_xcall_sync(struct cpu_info *ci, void
 /*
  * This is the glue between the MD IPI code and the calling of xcalls
  */
-static void
-cpu_xcall_handler(void *arg)
+void
+cpu_xcall_dispatch(struct cpu_info *ci)
 {
-	struct xcall_cpu *xci = arg;
+	struct xcall_cpu *xci = &ci->ci_xcall;
 	struct xcall *xc;
 	size_t i;
 
@@ -130,17 +130,6 @@ cpu_xcall_establish(struct cpu_info *ci)
 
 	for (i = 0; i < nitems(xci->xci_xcalls); i++)
 		xci->xci_xcalls[i] = NULL;
-
-	xci->xci_softintr = softintr_establish(IPL_XCALL | IPL_MPSAFE,
-	    cpu_xcall_handler, xci);
-}
-
-void
-cpu_xcall_dispatch(struct cpu_info *ci)
-{
-	struct xcall_cpu *xci = &ci->ci_xcall;
-
-	softintr_schedule(xci->xci_softintr);
 }
 
 #else /* MULTIPROCESSOR */
Index: sys/xcall.h
===================================================================
RCS file: /cvs/src/sys/sys/xcall.h,v
diff -u -p -r1.1 xcall.h
--- sys/xcall.h	13 Jul 2025 05:45:21 -0000	1.1
+++ sys/xcall.h	8 Nov 2025 23:45:14 -0000
@@ -45,7 +45,7 @@
  *
  * 4. cpu_xcall_ipi has to be provided by machine/intr.h.
  *
- * 5. The MD xcall IPI handler has to call cpu_xcall_dispatch.
+ * 5. call cpu_xcall_dispatch at IPL_SOFTCLOCK on the target CPU.
  */
 
 #ifndef _SYS_XCALL_H