Index | Thread | Search

From:
Hans-Jörg Höxer <hshoexer@genua.de>
Subject:
amd64: wbinvd_on_all_cpus() with acknowledge
To:
<tech@openbsd.org>
Date:
Mon, 4 Nov 2024 17:02:40 +0100

Download raw body.

Thread
  • Hans-Jörg Höxer:

    amd64: wbinvd_on_all_cpus() with acknowledge

When deactivating a SEV enabled guest, psp(4) needs to issue the DF_FLUSH
command.  This flushes all data from the data fabric.  Before invoking
this command, we are required to execute wbinvd on all cores.  Otherwise,
DF_FLUSH will fail with the error WBVIND_REQUIRED.

Up to now, we do this be calling wbinvd_on_all_cpus().  It uses a
broadcast IPI to trigger each core to execute the wbinvd instruction.
However, we do not have any guarantee that the instruction was actually
executed on all cores, when wbinvd_on_all_cpus() returns.  And once in a
while I actually see DF_FLUSH failing with WBVIND_REQUIRED.  This means,
on some core(s) wbinvd was not executed yet.
    
To solve this issue, I implemented wbinvd_on_all_cpus_acked().  It is
similar to pmap_tlb_shootpage().

amd64 pmap.c uses wbinvd_on_all_cpus().  There it is used together
with pmap_tlb_shootpage().  So there wbinvd is guaranteed to have been
executed before doing the actual TLB shoot down.

The only other user ov wbinvd_on_all_cpus() is pmap_flush_cache().
There we might have the same problem.  I have not investegated further,
yet.  But we might want to use wbinvd_on_all_cpus_acked() there, too.

Take care,
HJ.
-------------------------------------------------------------------------
commit 978e18cf8bbbb245dcb5442b13cbcf2240d1c809
Author: Hans-Joerg Hoexer <hshoexer@genua.de>
Date:   Mon Nov 4 16:11:16 2024 +0100

    amd64: wbinvd_on_all_cpus() with acknowledge
    
    Implement wbinvd_on_all_cpus_acked() similar to pmap_tlb_shootpage().
    This ensures, wbinvd has been executed on all cores when the function
    returns.

diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 42ea565a1f2..d7b46055814 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -1472,6 +1472,48 @@ wbinvd_on_all_cpus(void)
 	wbinvd();
 	return 0;
 }
+
+
+volatile long wbinvd_wait __attribute__((section(".kudata")));
+
+void
+wbinvd_on_all_cpus_acked(void)
+{
+	struct cpu_info *ci, *self = curcpu();;
+	CPU_INFO_ITERATOR cii;
+	long wait = 0;
+	u_int64_t mask = 0;
+	int s;
+
+	CPU_INFO_FOREACH(cii, ci) {
+		if (ci == self)
+			continue;
+		mask |= (1ULL << ci->ci_cpuid);
+		wait++;
+	}
+
+	KASSERT(wait > 0);
+
+	s = splvm();
+	while (atomic_cas_ulong(&wbinvd_wait, 0 , wait) != 0) {
+		while (wbinvd_wait != 0) {
+			CPU_BUSY_CYCLE();
+		}
+	}
+
+	CPU_INFO_FOREACH(cii, ci) {
+		if ((mask & (1ULL << ci->ci_cpuid)) == 0)
+			continue;
+		if (x86_fast_ipi(ci, LAPIC_IPI_WBINVD) != 0)
+			panic("%s: ipi failed", __func__);
+	}
+	splx(s);
+
+	wbinvd();
+
+	while (wbinvd_wait != 0)
+		CPU_BUSY_CYCLE();
+}
 #endif
 
 int cpu_suspended;
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index 14c62bfd7c3..3d487ed0afc 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -369,6 +369,8 @@ lapic_boot_init(paddr_t lapic_base)
 		idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_pcid);
 		idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_pcid);
 	}
+	idt_allocmap[LAPIC_IPI_WBINVD] = 1;
+	idt_vec_set(LAPIC_IPI_WBINVD, Xipi_wbinvd);
 #if NVMM > 0
 	idt_allocmap[LAPIC_IPI_INVEPT] = 1;
 	idt_vec_set(LAPIC_IPI_INVEPT, Xipi_invept);
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 3befb31efab..2a9c7ee17c1 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -775,6 +775,17 @@ IDTVEC(ipi_invlrange_pcid)
 	iretq
 END(Xipi_invlrange_pcid)
 
+IDTVEC(ipi_wbinvd)
+	ioapic_asm_ack()
+
+	wbinvd
+
+	lock
+	decq	wbinvd_wait
+
+	iretq
+END(Xipi_wbinvd)
+
 #endif /* MULTIPROCESSOR */
 
 	/*
diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h
index 5d83e42ab6b..b000baf37fb 100644
--- a/sys/arch/amd64/include/cpufunc.h
+++ b/sys/arch/amd64/include/cpufunc.h
@@ -285,6 +285,7 @@ wbinvd(void)
 
 #ifdef MULTIPROCESSOR
 int wbinvd_on_all_cpus(void);
+void wbinvd_on_all_cpus_acked(void);
 #else
 static inline int
 wbinvd_on_all_cpus(void)
diff --git a/sys/arch/amd64/include/i82489var.h b/sys/arch/amd64/include/i82489var.h
index 4f32f9f95c9..d80af989b7c 100644
--- a/sys/arch/amd64/include/i82489var.h
+++ b/sys/arch/amd64/include/i82489var.h
@@ -72,7 +72,8 @@ extern void Xresume_lapic_ipi(void);
 #define LAPIC_IPI_INVLTLB			(LAPIC_IPI_OFFSET + 0)
 #define LAPIC_IPI_INVLPG			(LAPIC_IPI_OFFSET + 1)
 #define LAPIC_IPI_INVLRANGE			(LAPIC_IPI_OFFSET + 2)
-#define LAPIC_IPI_INVEPT			(LAPIC_IPI_OFFSET + 3)
+#define LAPIC_IPI_WBINVD			(LAPIC_IPI_OFFSET + 3)
+#define LAPIC_IPI_INVEPT			(LAPIC_IPI_OFFSET + 4)
 
 extern void Xipi_invltlb(void);
 extern void Xipi_invltlb_pcid(void);
@@ -80,6 +81,7 @@ extern void Xipi_invlpg(void);
 extern void Xipi_invlpg_pcid(void);
 extern void Xipi_invlrange(void);
 extern void Xipi_invlrange_pcid(void);
+extern void Xipi_wbinvd(void);
 #if NVMM > 0
 extern void Xipi_invept(void);
 #endif /* NVMM > 0 */