From: Martin Pieuchot Subject: km_alloc(9) for malloc(9) To: tech@openbsd.org Date: Mon, 10 Nov 2025 17:26:37 +0000 Diff below gets rid of uvm_km_kmemalloc_pla() and instead use km_alloc(9) to allocate malloc(9) chunks. This is the last piece of the conversion started 14 years ago. - The M_CANFAIL logic regarding available swap space has been preserved and moved into malloc(9). - pages returned by uvm_pglistalloc(9) are now dequeued in km_alloc(9) to match the behavior of the old allocator. This will also let us add checks to know if pages are part of a list. - I didn't bring back the uvm_pagealloc_pg() intentionally, this is not useful without `obj' or `anon' associated to the pages. I'll take care of the man pages once this is in. ok? Index: kern/kern_malloc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_malloc.c,v diff -u -p -r1.156 kern_malloc.c --- kern/kern_malloc.c 10 Nov 2025 10:35:21 -0000 1.156 +++ kern/kern_malloc.c 10 Nov 2025 17:08:07 -0000 @@ -146,6 +146,7 @@ struct timeval malloc_lasterr; void * malloc(size_t size, int type, int flags) { + const struct kmem_dyn_mode *kdp; struct kmembuckets *kbp; struct kmemusage *kup; struct kmem_freelist *freep; @@ -216,10 +217,14 @@ malloc(size_t size, int type, int flags) if (XSIMPLEQ_FIRST(&kbp->kb_freelist) == NULL) { mtx_leave(&malloc_mtx); npg = atop(round_page(allocsize)); + KASSERT(uvmexp.swpgonly <= uvmexp.swpages); + if ((flags & M_NOWAIT) || ((flags & M_CANFAIL) && + uvmexp.swpages - uvmexp.swpgonly <= npg)) + kdp = &kd_nowait; + else + kdp = &kd_waitok; s = splvm(); - va = (caddr_t)uvm_km_kmemalloc_pla((vsize_t)ptoa(npg), - flags & (M_NOWAIT|M_CANFAIL), - no_constraint.ucr_low, no_constraint.ucr_high); + va = (caddr_t)km_alloc(ptoa(npg), &kv_intrsafe, &kp_dirty, kdp); splx(s); if (va == NULL) { /* @@ -425,7 +430,7 @@ free(void *addr, int type, size_t freeds kup->ku_pagecnt = 0; mtx_leave(&malloc_mtx); s = splvm(); - uvm_km_free((vaddr_t)addr, ptoa(pagecnt)); + km_free(addr, ptoa(pagecnt), &kv_intrsafe, &kp_dirty); splx(s); #ifdef KMEMSTATS mtx_enter(&malloc_mtx); Index: uvm/uvm_extern.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_extern.h,v diff -u -p -r1.186 uvm_extern.h --- uvm/uvm_extern.h 10 Nov 2025 10:35:21 -0000 1.186 +++ uvm/uvm_extern.h 10 Nov 2025 16:28:48 -0000 @@ -277,8 +277,6 @@ int uvm_io(vm_map_t, struct uio *, int #define UVM_IO_FIXPROT 0x01 -void uvm_km_free(vaddr_t, vsize_t); -vaddr_t uvm_km_kmemalloc_pla(vsize_t, int, paddr_t, paddr_t); struct vm_map *uvm_km_suballoc(vm_map_t, vaddr_t *, vaddr_t *, vsize_t, int, boolean_t, vm_map_t); /* Index: uvm/uvm_km.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_km.c,v diff -u -p -r1.158 uvm_km.c --- uvm/uvm_km.c 10 Nov 2025 10:35:21 -0000 1.158 +++ uvm/uvm_km.c 10 Nov 2025 16:50:15 -0000 @@ -312,86 +312,6 @@ uvm_km_pgremove_intrsafe(vaddr_t start, pmap_kremove(start, end - start); } -/* - * uvm_km_kmemalloc: lower level kernel memory allocator for malloc() - * - * => we map wired memory into the kernel map - * => NOTE: we can return NULL even if we can wait if there is not enough - * free VM space in the map... caller should be prepared to handle - * this case. - * => we return KVA of memory allocated - * => flags: M_NOWAIT, M_CANFAIL - * => low, high, are the corresponding parameters to uvm_pglistalloc - * => flags: ZERO - correspond to uvm_pglistalloc flags - */ -vaddr_t -uvm_km_kmemalloc_pla(vsize_t size, int flags, paddr_t low, paddr_t high) -{ - vaddr_t kva, loopva; - voff_t offset; - struct vm_page *pg; - struct pglist pgl; - int pla_flags = 0; - - /* setup for call */ - size = round_page(size); - kva = vm_map_min(kmem_map); /* hint */ - - /* allocate some virtual space */ - if (__predict_false(uvm_map(kmem_map, &kva, size, NULL, - UVM_UNKNOWN_OFFSET, 0, - UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, - MAP_INHERIT_NONE, MADV_RANDOM, 0)) != 0)) { - return 0; - } - - /* - * now allocate and map in the memory... note that we are the only ones - * whom should ever get a handle on this area of VM. - */ - TAILQ_INIT(&pgl); - KASSERT(uvmexp.swpgonly <= uvmexp.swpages); - if ((flags & M_NOWAIT) || ((flags & M_CANFAIL) && - uvmexp.swpages - uvmexp.swpgonly <= atop(size))) - pla_flags |= UVM_PLA_NOWAIT; - else - pla_flags |= UVM_PLA_WAITOK; - if (uvm_pglistalloc(size, low, high, 0, 0, &pgl, atop(size), - pla_flags) != 0) { - /* Failed. */ - uvm_unmap(kmem_map, kva, kva + size); - return (0); - } - - offset = 0; - loopva = kva; - while (loopva != kva + size) { - pg = TAILQ_FIRST(&pgl); - TAILQ_REMOVE(&pgl, pg, pageq); - uvm_pagealloc_pg(pg, NULL, offset, NULL); - atomic_clearbits_int(&pg->pg_flags, PG_BUSY); - UVM_PAGE_OWN(pg, NULL); - - pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), - PROT_READ | PROT_WRITE); - loopva += PAGE_SIZE; - offset += PAGE_SIZE; - } - KASSERT(TAILQ_EMPTY(&pgl)); - pmap_update(pmap_kernel()); - - return kva; -} - -/* - * uvm_km_free: free an area of kernel memory - */ -void -uvm_km_free(vaddr_t addr, vsize_t size) -{ - uvm_unmap(kmem_map, trunc_page(addr), round_page(addr+size)); -} - #if defined(__HAVE_PMAP_DIRECT) /* * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch @@ -651,9 +571,10 @@ km_alloc(size_t sz, const struct kmem_va * allocations. */ if (kv->kv_singlepage || kp->kp_maxseg == 1) { - TAILQ_FOREACH(pg, &pgl, pageq) { + while ((pg = TAILQ_FIRST(&pgl)) != NULL) { + TAILQ_REMOVE(&pgl, pg, pageq); va = pmap_map_direct(pg); - if (pg == TAILQ_FIRST(&pgl)) + if (sva == 0) sva = va; } return ((void *)sva); @@ -716,7 +637,8 @@ try_map: } } sva = va; - TAILQ_FOREACH(pg, &pgl, pageq) { + while ((pg = TAILQ_FIRST(&pgl)) != NULL) { + TAILQ_REMOVE(&pgl, pg, pageq); if (kp->kp_pageable) pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg), prot, prot | PMAP_WIRED);