From: Martin Pieuchot Subject: amd64: prefer enhanced REP MOVSB/STOSB feature if available To: tech@openbsd.org Date: Mon, 22 Dec 2025 13:23:18 +0100 As Mateusz Guzik pointed out recently [0] we can greatly reduce the amount of CPU cycles spent zeroing pages by using 'rep stosb'. Diff below does that, ok? [0] https://marc.info/?l=openbsd-tech&m=176631121132731&w=2 Index: arch/amd64/amd64/locore.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v diff -u -p -r1.151 locore.S --- arch/amd64/amd64/locore.S 2 Aug 2025 07:33:28 -0000 1.151 +++ arch/amd64/amd64/locore.S 22 Dec 2025 11:54:32 -0000 @@ -1172,6 +1172,16 @@ ENTRY(pagezero) lfence END(pagezero) +ENTRY(pagezero_erms) + RETGUARD_SETUP(pagezero_erms, r11) + movq $PAGE_SIZE,%rcx + xorq %rax,%rax + rep stosb + RETGUARD_CHECK(pagezero_erms, r11) + ret + lfence +END(pagezero_erms) + /* void pku_xonly(void) */ ENTRY(pku_xonly) movq pg_xo,%rax /* have PKU support? */ Index: arch/amd64/amd64/pmap.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/pmap.c,v diff -u -p -r1.182 pmap.c --- arch/amd64/amd64/pmap.c 15 Aug 2025 13:40:43 -0000 1.182 +++ arch/amd64/amd64/pmap.c 22 Dec 2025 11:55:07 -0000 @@ -1594,11 +1594,14 @@ pmap_extract(struct pmap *pmap, vaddr_t /* * pmap_zero_page: zero a page */ - void pmap_zero_page(struct vm_page *pg) { - pagezero(pmap_map_direct(pg)); + /* Prefer enhanced REP MOVSB/STOSB feature if available. */ + if (ISSET(curcpu()->ci_feature_sefflags_ebx, SEFF0EBX_ERMS)) + pagezero_erms(pmap_map_direct(pg)); + else + pagezero(pmap_map_direct(pg)); } /* Index: arch/amd64/include/pmap.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/pmap.h,v diff -u -p -r1.94 pmap.h --- arch/amd64/include/pmap.h 7 Jul 2025 00:55:15 -0000 1.94 +++ arch/amd64/include/pmap.h 22 Dec 2025 11:46:09 -0000 @@ -403,6 +403,7 @@ void pmap_write_protect(struct pmap *, paddr_t pmap_prealloc_lowmem_ptps(paddr_t); void pagezero(vaddr_t); +void pagezero_erms(vaddr_t); void pmap_convert(struct pmap *, int); void pmap_enter_special(vaddr_t, paddr_t, vm_prot_t);