Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
amd64: prefer enhanced REP MOVSB/STOSB feature if available
To:
tech@openbsd.org
Date:
Mon, 22 Dec 2025 13:23:18 +0100

Download raw body.

Thread
As Mateusz Guzik pointed out recently [0] we can greatly reduce the
amount of CPU cycles spent zeroing pages by using 'rep stosb'.

Diff below does that, ok?

[0] https://marc.info/?l=openbsd-tech&m=176631121132731&w=2

Index: arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
diff -u -p -r1.151 locore.S
--- arch/amd64/amd64/locore.S	2 Aug 2025 07:33:28 -0000	1.151
+++ arch/amd64/amd64/locore.S	22 Dec 2025 11:54:32 -0000
@@ -1172,6 +1172,16 @@ ENTRY(pagezero)
 	lfence
 END(pagezero)
 
+ENTRY(pagezero_erms)
+	RETGUARD_SETUP(pagezero_erms, r11)
+	movq    $PAGE_SIZE,%rcx
+	xorq    %rax,%rax
+	rep stosb
+	RETGUARD_CHECK(pagezero_erms, r11)
+	ret
+	lfence
+END(pagezero_erms)
+
 /* void pku_xonly(void) */
 ENTRY(pku_xonly)
 	movq	pg_xo,%rax	/* have PKU support? */
Index: arch/amd64/amd64/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/pmap.c,v
diff -u -p -r1.182 pmap.c
--- arch/amd64/amd64/pmap.c	15 Aug 2025 13:40:43 -0000	1.182
+++ arch/amd64/amd64/pmap.c	22 Dec 2025 11:55:07 -0000
@@ -1594,11 +1594,14 @@ pmap_extract(struct pmap *pmap, vaddr_t 
 /*
  * pmap_zero_page: zero a page
  */
-
 void
 pmap_zero_page(struct vm_page *pg)
 {
-	pagezero(pmap_map_direct(pg));
+	/* Prefer enhanced REP MOVSB/STOSB feature if available. */
+	if (ISSET(curcpu()->ci_feature_sefflags_ebx, SEFF0EBX_ERMS))
+		pagezero_erms(pmap_map_direct(pg));
+	else
+		pagezero(pmap_map_direct(pg));
 }
 
 /*
Index: arch/amd64/include/pmap.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/pmap.h,v
diff -u -p -r1.94 pmap.h
--- arch/amd64/include/pmap.h	7 Jul 2025 00:55:15 -0000	1.94
+++ arch/amd64/include/pmap.h	22 Dec 2025 11:46:09 -0000
@@ -403,6 +403,7 @@ void		pmap_write_protect(struct pmap *, 
 paddr_t	pmap_prealloc_lowmem_ptps(paddr_t);
 
 void	pagezero(vaddr_t);
+void	pagezero_erms(vaddr_t);
 
 void	pmap_convert(struct pmap *, int);
 void	pmap_enter_special(vaddr_t, paddr_t, vm_prot_t);