From: Claudio Jeker Subject: Re: Faster _exit(2) for a faster userland: R.I.P the reaper To: Ted Unangst , dlg@openbsd.org, visa@openbsd.org, tech@openbsd.org, jan@openbsd.org, ajacoutot@openbsd.org Date: Mon, 5 May 2025 17:25:02 +0200 On Sun, May 04, 2025 at 08:38:06AM -0600, Theo de Raadt wrote: > Martin Pieuchot wrote: > > > > I do not understand any of the diff. It seems to be trying to return a > > > child to the parent, before resources are cleared. > > > > It doesn't. Order of operations didn't change. The KERNEL_LOCK() is > > there and asserted. > > I did not mention the KERNEL_LOCK(). > > > I disagree. I need to remove the reaper. That *is* the problem. > > You are using proc0 in an very dangerous way. > > > > I've called this uvmspace_purge(). It would be a bit like > > > > > > uvm_unmap_remove(p->p_vmspace, 0, VM_MAXUSER_ADDRESS, > > > &dead_entries, FALSE, TRUE, TRUE) > > > > > > Except there are clearly optimizations. The function can sleep. It is > > > called quite early in process context. > > > [...] > > > > There's no need to add a new function. Calling uvm_exit() from exit1() > > is enough and work but that's not buying us much. > > As proc0? Disagree strongly on this direction. The process should free > it's own pages out of it's own map. It can do that if uvm_exit() is split. > > But you've made up your mind to ignore everything everyone says. You don't > even reply to emails about the proposals. Your public requests for feedback > come off as fake. Here is a minimal diff implementing uvm_purge() in the dumbest way possible. It removes the reaper from the hot path without any scary hacks. It only fixes uvm_map_teardown() so it can be called again. I tried to zap the pmap first (with pmap_remove of the full user address space) but that is very slow. I think pmap can be optimized a lot here but we don't have an api for that yet. -- :wq Claudio Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v diff -u -p -r1.245 kern_exit.c --- kern/kern_exit.c 2 May 2025 05:04:38 -0000 1.245 +++ kern/kern_exit.c 5 May 2025 15:18:17 -0000 @@ -235,6 +235,10 @@ exit1(struct proc *p, int xexit, int xsi free(pr->ps_libcpin.pn_pins, M_PINSYSCALL, pr->ps_libcpin.pn_npins * sizeof(u_int)); + KERNEL_UNLOCK(); + uvm_purge(pr); + KERNEL_LOCK(); + /* * If parent has the SAS_NOCLDWAIT flag set, we're not * going to become a zombie. Index: uvm/uvm_extern.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_extern.h,v diff -u -p -r1.180 uvm_extern.h --- uvm/uvm_extern.h 19 Nov 2024 06:18:26 -0000 1.180 +++ uvm/uvm_extern.h 5 May 2025 15:18:17 -0000 @@ -269,6 +269,7 @@ int uvm_fault(vm_map_t, vaddr_t, vm_fa vaddr_t uvm_uarea_alloc(void); void uvm_uarea_free(struct proc *); void uvm_exit(struct process *); +void uvm_purge(struct process *); void uvm_init_limits(struct plimit *); boolean_t uvm_kernacc(caddr_t, size_t, int); Index: uvm/uvm_glue.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_glue.c,v diff -u -p -r1.88 uvm_glue.c --- uvm/uvm_glue.c 21 Mar 2025 13:19:33 -0000 1.88 +++ uvm/uvm_glue.c 5 May 2025 15:18:17 -0000 @@ -297,6 +297,29 @@ uvm_exit(struct process *pr) uvmspace_free(vm); } +void +uvm_purge(struct process *pr) +{ + struct vmspace *vm = pr->ps_vmspace; + + /* only proc0 uses shared vm space */ + if (atomic_load_int(&vm->vm_refcnt) != 1) + return; + +#ifdef SYSVSHM + /* Get rid of any SYSV shared memory segments. */ + if (vm->vm_shm != NULL) { + KERNEL_LOCK(); + shmexit(vm); + KERNEL_UNLOCK(); + } +#endif + +// pmap_remove(vm->vm_map.pmap, 0, VM_MAXUSER_ADDRESS); + + uvm_map_teardown(&vm->vm_map); +} + /* * uvm_init_limit: init per-process VM limits * Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v diff -u -p -r1.341 uvm_map.c --- uvm/uvm_map.c 21 Apr 2025 14:46:18 -0000 1.341 +++ uvm/uvm_map.c 5 May 2025 15:18:17 -0000 @@ -138,7 +138,6 @@ int uvm_map_pageable_wire(struct vm_m vaddr_t, vaddr_t, int); void uvm_map_setup_entries(struct vm_map*); void uvm_map_setup_md(struct vm_map*); -void uvm_map_teardown(struct vm_map*); void uvm_map_vmspace_update(struct vm_map*, struct uvm_map_deadq*, int); void uvm_map_kmem_grow(struct vm_map*, @@ -2490,6 +2489,7 @@ uvm_map_teardown(struct vm_map *map) /* Update wave-front. */ entry = TAILQ_NEXT(entry, dfree.deadq); } + RBT_INIT(uvm_map_addr, &map->addr); vm_map_unlock(map); @@ -2512,9 +2512,6 @@ uvm_map_teardown(struct vm_map *map) KASSERT(numt == numq); #endif uvm_unmap_detach(&dead_entries, 0); - - pmap_destroy(map->pmap); - map->pmap = NULL; } /* @@ -3417,6 +3414,9 @@ uvmspace_free(struct vmspace *vm) #endif uvm_map_teardown(&vm->vm_map); + + pmap_destroy(vm->vm_map.pmap); + pool_put(&uvm_vmspace_pool, vm); } } Index: uvm/uvm_map.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.h,v diff -u -p -r1.94 uvm_map.h --- uvm/uvm_map.h 15 Nov 2024 02:59:23 -0000 1.94 +++ uvm/uvm_map.h 5 May 2025 15:18:17 -0000 @@ -359,6 +359,7 @@ void uvm_unmap(struct vm_map *, vaddr_t void uvm_unmap_detach(struct uvm_map_deadq *, int); int uvm_unmap_remove(struct vm_map*, vaddr_t, vaddr_t, struct uvm_map_deadq *, boolean_t, boolean_t, boolean_t); +void uvm_map_teardown(struct vm_map*); void uvm_map_set_uaddr(struct vm_map*, struct uvm_addr_state**, struct uvm_addr_state*); int uvm_map_mquery(struct vm_map*, vaddr_t*, vsize_t, voff_t, int);