From: Hans-Jörg Höxer Subject: amd64: wbinvd_on_all_cpus() with acknowledge To: Date: Mon, 4 Nov 2024 17:02:40 +0100 When deactivating a SEV enabled guest, psp(4) needs to issue the DF_FLUSH command. This flushes all data from the data fabric. Before invoking this command, we are required to execute wbinvd on all cores. Otherwise, DF_FLUSH will fail with the error WBVIND_REQUIRED. Up to now, we do this be calling wbinvd_on_all_cpus(). It uses a broadcast IPI to trigger each core to execute the wbinvd instruction. However, we do not have any guarantee that the instruction was actually executed on all cores, when wbinvd_on_all_cpus() returns. And once in a while I actually see DF_FLUSH failing with WBVIND_REQUIRED. This means, on some core(s) wbinvd was not executed yet. To solve this issue, I implemented wbinvd_on_all_cpus_acked(). It is similar to pmap_tlb_shootpage(). amd64 pmap.c uses wbinvd_on_all_cpus(). There it is used together with pmap_tlb_shootpage(). So there wbinvd is guaranteed to have been executed before doing the actual TLB shoot down. The only other user ov wbinvd_on_all_cpus() is pmap_flush_cache(). There we might have the same problem. I have not investegated further, yet. But we might want to use wbinvd_on_all_cpus_acked() there, too. Take care, HJ. ------------------------------------------------------------------------- commit 978e18cf8bbbb245dcb5442b13cbcf2240d1c809 Author: Hans-Joerg Hoexer Date: Mon Nov 4 16:11:16 2024 +0100 amd64: wbinvd_on_all_cpus() with acknowledge Implement wbinvd_on_all_cpus_acked() similar to pmap_tlb_shootpage(). This ensures, wbinvd has been executed on all cores when the function returns. diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c index 42ea565a1f2..d7b46055814 100644 --- a/sys/arch/amd64/amd64/cpu.c +++ b/sys/arch/amd64/amd64/cpu.c @@ -1472,6 +1472,48 @@ wbinvd_on_all_cpus(void) wbinvd(); return 0; } + + +volatile long wbinvd_wait __attribute__((section(".kudata"))); + +void +wbinvd_on_all_cpus_acked(void) +{ + struct cpu_info *ci, *self = curcpu();; + CPU_INFO_ITERATOR cii; + long wait = 0; + u_int64_t mask = 0; + int s; + + CPU_INFO_FOREACH(cii, ci) { + if (ci == self) + continue; + mask |= (1ULL << ci->ci_cpuid); + wait++; + } + + KASSERT(wait > 0); + + s = splvm(); + while (atomic_cas_ulong(&wbinvd_wait, 0 , wait) != 0) { + while (wbinvd_wait != 0) { + CPU_BUSY_CYCLE(); + } + } + + CPU_INFO_FOREACH(cii, ci) { + if ((mask & (1ULL << ci->ci_cpuid)) == 0) + continue; + if (x86_fast_ipi(ci, LAPIC_IPI_WBINVD) != 0) + panic("%s: ipi failed", __func__); + } + splx(s); + + wbinvd(); + + while (wbinvd_wait != 0) + CPU_BUSY_CYCLE(); +} #endif int cpu_suspended; diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index 14c62bfd7c3..3d487ed0afc 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -369,6 +369,8 @@ lapic_boot_init(paddr_t lapic_base) idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_pcid); idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_pcid); } + idt_allocmap[LAPIC_IPI_WBINVD] = 1; + idt_vec_set(LAPIC_IPI_WBINVD, Xipi_wbinvd); #if NVMM > 0 idt_allocmap[LAPIC_IPI_INVEPT] = 1; idt_vec_set(LAPIC_IPI_INVEPT, Xipi_invept); diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index 3befb31efab..2a9c7ee17c1 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -775,6 +775,17 @@ IDTVEC(ipi_invlrange_pcid) iretq END(Xipi_invlrange_pcid) +IDTVEC(ipi_wbinvd) + ioapic_asm_ack() + + wbinvd + + lock + decq wbinvd_wait + + iretq +END(Xipi_wbinvd) + #endif /* MULTIPROCESSOR */ /* diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h index 5d83e42ab6b..b000baf37fb 100644 --- a/sys/arch/amd64/include/cpufunc.h +++ b/sys/arch/amd64/include/cpufunc.h @@ -285,6 +285,7 @@ wbinvd(void) #ifdef MULTIPROCESSOR int wbinvd_on_all_cpus(void); +void wbinvd_on_all_cpus_acked(void); #else static inline int wbinvd_on_all_cpus(void) diff --git a/sys/arch/amd64/include/i82489var.h b/sys/arch/amd64/include/i82489var.h index 4f32f9f95c9..d80af989b7c 100644 --- a/sys/arch/amd64/include/i82489var.h +++ b/sys/arch/amd64/include/i82489var.h @@ -72,7 +72,8 @@ extern void Xresume_lapic_ipi(void); #define LAPIC_IPI_INVLTLB (LAPIC_IPI_OFFSET + 0) #define LAPIC_IPI_INVLPG (LAPIC_IPI_OFFSET + 1) #define LAPIC_IPI_INVLRANGE (LAPIC_IPI_OFFSET + 2) -#define LAPIC_IPI_INVEPT (LAPIC_IPI_OFFSET + 3) +#define LAPIC_IPI_WBINVD (LAPIC_IPI_OFFSET + 3) +#define LAPIC_IPI_INVEPT (LAPIC_IPI_OFFSET + 4) extern void Xipi_invltlb(void); extern void Xipi_invltlb_pcid(void); @@ -80,6 +81,7 @@ extern void Xipi_invlpg(void); extern void Xipi_invlpg_pcid(void); extern void Xipi_invlrange(void); extern void Xipi_invlrange_pcid(void); +extern void Xipi_wbinvd(void); #if NVMM > 0 extern void Xipi_invept(void); #endif /* NVMM > 0 */