Index | Thread | Search

From:
Dave Voutila <dv@sisu.io>
Subject:
Re: rewrite wbinvd_on_all_cpus_acked() using cpu_xcall(9)
To:
tech@openbsd.org
Cc:
dlg@openbsd.org
Date:
Thu, 06 Nov 2025 12:18:27 -0500

Download raw body.

Thread
hshoexer <hshoexer@yerbouti.franken.de> writes:

> Hi,
>
> I'd like to simplify the implementation of amd64's
> wbinvd_on_all_cpus_acked() by using cpu_xcall(9).
>
> The diff below rewrites wbinvd_on_all_cpus_acked() accordingly.
>
> The required xcall glue code for amd64 is from this [1] mail by
> dlg@.  If the wbinvd_on_all_cpus_acked() is ok, I'm fine with
> splitting this diff.
>
> Thoughts?
>

I believe dlg@ has some changes he may not have shared that are not in
tree that shuffle a bit of how things split across MI/MD.

Given we're post-release, I think it's a good time to start that
conversation again.

> Take care,
> HJ.
>
> [1] https://marc.info/?l=openbsd-tech&m=175246988124240
> -----------------------------------------------------------------
> commit 394bdebb36b1c9699343896fa2091cd1f52be6e3
> Author: hshoexer <hshoexer@yerbouti.franken.de>
> Date:   Thu Nov 6 15:01:52 2025 +0100
>
>     rewrite wbinvd_on_all_cpus_acked() using cpu_xcall(9)
>
>     change includes required amd64 glue code by dlg@ as seen here:
>     https://marc.info/?l=openbsd-tech&m=175246988124240
>
> diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
> index 5baf179a398..93f554cb652 100644
> --- a/sys/arch/amd64/amd64/cpu.c
> +++ b/sys/arch/amd64/amd64/cpu.c
> @@ -79,6 +79,7 @@
>  #include <sys/memrange.h>
>  #include <sys/atomic.h>
>  #include <sys/user.h>
> +#include <sys/xcall.h>
>
>  #include <uvm/uvm_extern.h>
>
> @@ -642,6 +643,8 @@ cpu_attach(struct device *parent, struct device *self, void *aux)
>  #endif
>
>  #if defined(MULTIPROCESSOR)
> +	cpu_xcall_establish(ci);
> +
>  	/*
>  	 * Allocate UPAGES contiguous pages for the idle PCB and stack.
>  	 */
> @@ -1475,44 +1478,23 @@ wbinvd_on_all_cpus(void)
>  	return 0;
>  }
>
> -volatile long wbinvd_wait __attribute__((section(".kudata")));
> +static void
> +xcwbinvd(void *arg)
> +{
> +	wbinvd();
> +}
>
>  void
>  wbinvd_on_all_cpus_acked(void)
>  {
> -	struct cpu_info *ci, *self = curcpu();;
> -	CPU_INFO_ITERATOR cii;
> -	long wait = 0;
> -	u_int64_t mask = 0;
> -	int s;
> +	struct cpu_info		*ci;
> +	CPU_INFO_ITERATOR	 cii;
>
>  	CPU_INFO_FOREACH(cii, ci) {
> -		if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
> +		if (!(ci->ci_flags & CPUF_RUNNING))
>  			continue;
> -		mask |= (1ULL << ci->ci_cpuid);
> -		wait++;
> -	}
> -
> -	if (wait > 0) {
> -		s = splvm();
> -		while (atomic_cas_ulong(&wbinvd_wait, 0 , wait) != 0) {
> -			while (wbinvd_wait != 0)
> -				CPU_BUSY_CYCLE();
> -		}
> -
> -		CPU_INFO_FOREACH(cii, ci) {
> -			if ((mask & (1ULL << ci->ci_cpuid)) == 0)
> -				continue;
> -			if (x86_fast_ipi(ci, LAPIC_IPI_WBINVD) != 0)
> -				panic("%s: ipi failed", __func__);
> -		}
> -		splx(s);
> +		cpu_xcall_sync(ci, xcwbinvd, NULL, "xcwbinvd");
>  	}
> -
> -	wbinvd();
> -
> -	while (wbinvd_wait != 0)
> -		CPU_BUSY_CYCLE();
>  }
>  #endif /* MULTIPROCESSOR */
>
> diff --git a/sys/arch/amd64/amd64/ipifuncs.c b/sys/arch/amd64/amd64/ipifuncs.c
> index 482fc9eebb4..8a068278fc2 100644
> --- a/sys/arch/amd64/amd64/ipifuncs.c
> +++ b/sys/arch/amd64/amd64/ipifuncs.c
> @@ -108,6 +108,7 @@ void (*ipifunc[X86_NIPI])(struct cpu_info *) =
>  	NULL,
>  #endif
>  	x86_64_ipi_wbinvd,
> +	cpu_xcall_dispatch,
>  };
>
>  void
> diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
> index f7fdb81ccca..ccf743303f2 100644
> --- a/sys/arch/amd64/amd64/lapic.c
> +++ b/sys/arch/amd64/amd64/lapic.c
> @@ -364,8 +364,6 @@ lapic_boot_init(paddr_t lapic_base)
>  		idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_pcid);
>  		idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_pcid);
>  	}
> -	idt_allocmap[LAPIC_IPI_WBINVD] = 1;
> -	idt_vec_set(LAPIC_IPI_WBINVD, Xipi_wbinvd);
>  #if NVMM > 0
>  	idt_allocmap[LAPIC_IPI_INVEPT] = 1;
>  	idt_vec_set(LAPIC_IPI_INVEPT, Xipi_invept);
> diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
> index 8b82db6b4f6..c531c9f827c 100644
> --- a/sys/arch/amd64/amd64/vector.S
> +++ b/sys/arch/amd64/amd64/vector.S
> @@ -816,17 +816,6 @@ IDTVEC(ipi_invlrange_pcid)
>  	iretq
>  END(Xipi_invlrange_pcid)
>
> -IDTVEC(ipi_wbinvd)
> -	ioapic_asm_ack()
> -
> -	wbinvd
> -
> -	lock
> -	decq	wbinvd_wait
> -
> -	iretq
> -END(Xipi_wbinvd)
> -
>  #endif /* MULTIPROCESSOR */
>
>  	/*
> diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
> index 630de36a01d..38d5796f650 100644
> --- a/sys/arch/amd64/conf/files.amd64
> +++ b/sys/arch/amd64/conf/files.amd64
> @@ -95,7 +95,7 @@ file	arch/amd64/amd64/mpbios.c		mpbios needs-flag
>  file	arch/amd64/amd64/mpbios_intr_fixup.c	mpbios & pci
>
>  define	cpu {[apid = -1]}
> -device	cpu
> +device	cpu: xcall
>  attach	cpu at mainbus
>  file	arch/amd64/amd64/cpu.c	cpu
>
> diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
> index b125f1c4d50..f3b8ec80d96 100644
> --- a/sys/arch/amd64/include/cpu.h
> +++ b/sys/arch/amd64/include/cpu.h
> @@ -53,6 +53,7 @@
>  #include <sys/sched.h>
>  #include <sys/sensors.h>
>  #include <sys/srp.h>
> +#include <sys/xcall.h>
>  #include <uvm/uvm_percpu.h>
>
>  #ifdef _KERNEL
> @@ -215,6 +216,7 @@ struct cpu_info {
>
>  #ifdef MULTIPROCESSOR
>  	struct srp_hazard	ci_srp_hazards[SRP_HAZARD_NUM];
> +	struct xcall_cpu	ci_xcall;
>  #define __HAVE_UVM_PERCPU
>  	struct uvm_pmr_cache	ci_uvm;		/* [o] page cache */
>  #endif
> diff --git a/sys/arch/amd64/include/i82489var.h b/sys/arch/amd64/include/i82489var.h
> index 561efa3fbd7..95dfff5173d 100644
> --- a/sys/arch/amd64/include/i82489var.h
> +++ b/sys/arch/amd64/include/i82489var.h
> @@ -72,8 +72,7 @@ extern void Xresume_lapic_ipi(void);
>  #define LAPIC_IPI_INVLTLB			(LAPIC_IPI_OFFSET + 0)
>  #define LAPIC_IPI_INVLPG			(LAPIC_IPI_OFFSET + 1)
>  #define LAPIC_IPI_INVLRANGE			(LAPIC_IPI_OFFSET + 2)
> -#define LAPIC_IPI_WBINVD			(LAPIC_IPI_OFFSET + 3)
> -#define LAPIC_IPI_INVEPT			(LAPIC_IPI_OFFSET + 4)
> +#define LAPIC_IPI_INVEPT			(LAPIC_IPI_OFFSET + 3)
>
>  extern void Xipi_invltlb(void);
>  extern void Xipi_invltlb_pcid(void);
> @@ -81,7 +80,6 @@ extern void Xipi_invlpg(void);
>  extern void Xipi_invlpg_pcid(void);
>  extern void Xipi_invlrange(void);
>  extern void Xipi_invlrange_pcid(void);
> -extern void Xipi_wbinvd(void);
>  #if NVMM > 0
>  extern void Xipi_invept(void);
>  #endif /* NVMM > 0 */
> diff --git a/sys/arch/amd64/include/intr.h b/sys/arch/amd64/include/intr.h
> index c0e12824c2d..ffdff36ca38 100644
> --- a/sys/arch/amd64/include/intr.h
> +++ b/sys/arch/amd64/include/intr.h
> @@ -222,7 +222,9 @@ void x86_ipi_handler(void);
>  void x86_setperf_ipi(struct cpu_info *);
>
>  extern void (*ipifunc[X86_NIPI])(struct cpu_info *);
> -#endif
> +
> +#define cpu_xcall_ipi(_ci) x86_send_ipi((_ci), X86_IPI_XCALL)
> +#endif /* MULTIPROCESSOR */
>
>  #endif /* !_LOCORE */
>
> diff --git a/sys/arch/amd64/include/intrdefs.h b/sys/arch/amd64/include/intrdefs.h
> index 048d0202b88..69238b46155 100644
> --- a/sys/arch/amd64/include/intrdefs.h
> +++ b/sys/arch/amd64/include/intrdefs.h
> @@ -85,8 +85,9 @@
>  #define X86_IPI_START_VMM		0x00000100
>  #define X86_IPI_STOP_VMM		0x00000200
>  #define X86_IPI_WBINVD			0x00000400
> +#define X86_IPI_XCALL			0x00000800
>
> -#define X86_NIPI			12
> +#define X86_NIPI			13
>
>  #define IREENT_MAGIC	0x18041969
>
> diff --git a/sys/conf/files b/sys/conf/files
> index 59df8c6fd1b..1e00c25cb6c 100644
> --- a/sys/conf/files
> +++ b/sys/conf/files
> @@ -751,7 +751,7 @@ file kern/kern_uuid.c
>  file kern/kern_watchdog.c		!small_kernel
>  file kern/kern_task.c
>  file kern/kern_srp.c
> -file kern/kern_xcall.c			xcall			needs-flag
> +file kern/kern_xcall.c			xcall
>  file kern/kern_xxx.c
>  file kern/sched_bsd.c
>  file kern/subr_autoconf.c