Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
km_alloc(9) for malloc(9)
To:
tech@openbsd.org
Date:
Mon, 10 Nov 2025 17:26:37 +0000

Download raw body.

Thread
Diff below gets rid of uvm_km_kmemalloc_pla() and instead use km_alloc(9)
to allocate malloc(9) chunks.  This is the last piece of the conversion
started 14 years ago.

- The M_CANFAIL logic regarding available swap space has been preserved
and moved into malloc(9).

- pages returned by uvm_pglistalloc(9) are now dequeued in km_alloc(9)
to match the behavior of the old allocator.  This will also let us add
checks to know if pages are part of a list.

- I didn't bring back the uvm_pagealloc_pg() intentionally, this is not
useful without `obj' or `anon' associated to the pages.

I'll take care of the man pages once this is in.

ok?

Index: kern/kern_malloc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_malloc.c,v
diff -u -p -r1.156 kern_malloc.c
--- kern/kern_malloc.c	10 Nov 2025 10:35:21 -0000	1.156
+++ kern/kern_malloc.c	10 Nov 2025 17:08:07 -0000
@@ -146,6 +146,7 @@ struct timeval malloc_lasterr;
 void *
 malloc(size_t size, int type, int flags)
 {
+	const struct kmem_dyn_mode *kdp;
 	struct kmembuckets *kbp;
 	struct kmemusage *kup;
 	struct kmem_freelist *freep;
@@ -216,10 +217,14 @@ malloc(size_t size, int type, int flags)
 	if (XSIMPLEQ_FIRST(&kbp->kb_freelist) == NULL) {
 		mtx_leave(&malloc_mtx);
 		npg = atop(round_page(allocsize));
+		KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
+		if ((flags & M_NOWAIT) || ((flags & M_CANFAIL) &&
+		    uvmexp.swpages - uvmexp.swpgonly <= npg))
+			kdp = &kd_nowait;
+		else
+			kdp = &kd_waitok;
 		s = splvm();
-		va = (caddr_t)uvm_km_kmemalloc_pla((vsize_t)ptoa(npg),
-		    flags & (M_NOWAIT|M_CANFAIL),
-		    no_constraint.ucr_low, no_constraint.ucr_high);
+		va = (caddr_t)km_alloc(ptoa(npg), &kv_intrsafe, &kp_dirty, kdp);
 		splx(s);
 		if (va == NULL) {
 			/*
@@ -425,7 +430,7 @@ free(void *addr, int type, size_t freeds
 		kup->ku_pagecnt = 0;
 		mtx_leave(&malloc_mtx);
 		s = splvm();
-		uvm_km_free((vaddr_t)addr, ptoa(pagecnt));
+		km_free(addr, ptoa(pagecnt), &kv_intrsafe, &kp_dirty);
 		splx(s);
 #ifdef KMEMSTATS
 		mtx_enter(&malloc_mtx);
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
diff -u -p -r1.186 uvm_extern.h
--- uvm/uvm_extern.h	10 Nov 2025 10:35:21 -0000	1.186
+++ uvm/uvm_extern.h	10 Nov 2025 16:28:48 -0000
@@ -277,8 +277,6 @@ int			uvm_io(vm_map_t, struct uio *, int
 
 #define	UVM_IO_FIXPROT	0x01
 
-void			uvm_km_free(vaddr_t, vsize_t);
-vaddr_t			uvm_km_kmemalloc_pla(vsize_t, int, paddr_t, paddr_t);
 struct vm_map		*uvm_km_suballoc(vm_map_t, vaddr_t *, vaddr_t *,
 			    vsize_t, int, boolean_t, vm_map_t);
 /*
Index: uvm/uvm_km.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_km.c,v
diff -u -p -r1.158 uvm_km.c
--- uvm/uvm_km.c	10 Nov 2025 10:35:21 -0000	1.158
+++ uvm/uvm_km.c	10 Nov 2025 16:50:15 -0000
@@ -312,86 +312,6 @@ uvm_km_pgremove_intrsafe(vaddr_t start, 
 	pmap_kremove(start, end - start);
 }
 
-/*
- * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
- *
- * => we map wired memory into the kernel map
- * => NOTE: we can return NULL even if we can wait if there is not enough
- *	free VM space in the map... caller should be prepared to handle
- *	this case.
- * => we return KVA of memory allocated
- * => flags: M_NOWAIT, M_CANFAIL
- * => low, high, are the corresponding parameters to uvm_pglistalloc
- * => flags: ZERO - correspond to uvm_pglistalloc flags
- */
-vaddr_t
-uvm_km_kmemalloc_pla(vsize_t size, int flags, paddr_t low, paddr_t high)
-{
-	vaddr_t kva, loopva;
-	voff_t offset;
-	struct vm_page *pg;
-	struct pglist pgl;
-	int pla_flags = 0;
-
-	/* setup for call */
-	size = round_page(size);
-	kva = vm_map_min(kmem_map);	/* hint */
-
-	/* allocate some virtual space */
-	if (__predict_false(uvm_map(kmem_map, &kva, size, NULL,
-	    UVM_UNKNOWN_OFFSET, 0,
-	    UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
-	    MAP_INHERIT_NONE, MADV_RANDOM, 0)) != 0)) {
-		return 0;
-	}
-
-	/*
-	 * now allocate and map in the memory... note that we are the only ones
-	 * whom should ever get a handle on this area of VM.
-	 */
-	TAILQ_INIT(&pgl);
-	KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
-	if ((flags & M_NOWAIT) || ((flags & M_CANFAIL) &&
-	    uvmexp.swpages - uvmexp.swpgonly <= atop(size)))
-		pla_flags |= UVM_PLA_NOWAIT;
-	else
-		pla_flags |= UVM_PLA_WAITOK;
-	if (uvm_pglistalloc(size, low, high, 0, 0, &pgl, atop(size),
-	    pla_flags) != 0) {
-		/* Failed. */
-		uvm_unmap(kmem_map, kva, kva + size);
-		return (0);
-	}
-
-	offset = 0;
-	loopva = kva;
-	while (loopva != kva + size) {
-		pg = TAILQ_FIRST(&pgl);
-		TAILQ_REMOVE(&pgl, pg, pageq);
-		uvm_pagealloc_pg(pg, NULL, offset, NULL);
-		atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
-		UVM_PAGE_OWN(pg, NULL);
-
-		pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
-		    PROT_READ | PROT_WRITE);
-		loopva += PAGE_SIZE;
-		offset += PAGE_SIZE;
-	}
-	KASSERT(TAILQ_EMPTY(&pgl));
-	pmap_update(pmap_kernel());
-
-	return kva;
-}
-
-/*
- * uvm_km_free: free an area of kernel memory
- */
-void
-uvm_km_free(vaddr_t addr, vsize_t size)
-{
-	uvm_unmap(kmem_map, trunc_page(addr), round_page(addr+size));
-}
-
 #if defined(__HAVE_PMAP_DIRECT)
 /*
  * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch
@@ -651,9 +571,10 @@ km_alloc(size_t sz, const struct kmem_va
 	 * allocations.
 	 */
 	if (kv->kv_singlepage || kp->kp_maxseg == 1) {
-		TAILQ_FOREACH(pg, &pgl, pageq) {
+		while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
+			TAILQ_REMOVE(&pgl, pg, pageq);
 			va = pmap_map_direct(pg);
-			if (pg == TAILQ_FIRST(&pgl))
+			if (sva == 0)
 				sva = va;
 		}
 		return ((void *)sva);
@@ -716,7 +637,8 @@ try_map:
 		}
 	}
 	sva = va;
-	TAILQ_FOREACH(pg, &pgl, pageq) {
+	while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
+		TAILQ_REMOVE(&pgl, pg, pageq);
 		if (kp->kp_pageable)
 			pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg),
 			    prot, prot | PMAP_WIRED);