From: Mark Kettenis Subject: Re: Fewer pages used for mmap descriptors To: Mateusz Guzik Cc: mpi@grenadille.net, tech@openbsd.org Date: Sat, 27 Sep 2025 14:38:51 +0200 > Date: Sat, 27 Sep 2025 13:55:15 +0200 > From: Mateusz Guzik > > On Fri, Sep 26, 2025 at 10:21:26AM +0000, Martin Pieuchot wrote: > > The kernel currently allocates a "mmap descriptor" (AKA UVM vnode > > descriptor) for every vnode. This wastes a lot of memory. > > > > Diff below moves the allocation of such descriptors into uvn_attach() > > and greatly reduces the waste. Note that such descriptors are, like > > vnodes, never freed. Nothing is changed in that regard. > > > > On a small VM with 128M or RAM the difference after boot is huge: > > > > before: > > Name Size Requests Fail InUse Pgreq Pgrel Npage Hiwat Minpg Maxpg Idle > > uvmvnodes 80 1533 0 1533 32 0 32 32 0 8 0 > > > > after: > > Name Size Requests Fail InUse Pgreq Pgrel Npage Hiwat Minpg Maxpg Idle > > uvmvnodes 80 74 0 74 2 0 2 2 0 8 0 > > > > Do I read correctly that the association between a vnode and "mmap > descriptor" is constant after it gets established? > > I presume the saving is coming from not allocating the object for > directories. It is also not allocated for files that are never mmaped. > Since getnewvnode() is not told what the allocated vnode is going to be, > I expect the total count of uvmvnodes will be creeping up as vnodes get > recycled and have their type changed VDIR<->VREG. Not exactly, but yes, I expect that if you leave a machine up long enough, the number of uvmvnodes will creep up towards the vnodes. > Assuming the assocation has to stay constant, I suspect the idea can be > greatly helped by telling getnewvnode about what is being allocated. > Then it can look at first n vnodes on the LRU in hopes of recycling a > matching type (and preventing/reusing an allocation respectively), > intead of just grabbing the first one it sees. > > By matching type I mean known to use uvmvnodes vs not. > > > > > Index: kern/exec_subr.c > > =================================================================== > > RCS file: /cvs/src/sys/kern/exec_subr.c,v > > diff -u -p -r1.70 exec_subr.c > > --- kern/exec_subr.c 20 Sep 2025 13:53:36 -0000 1.70 > > +++ kern/exec_subr.c 26 Sep 2025 09:12:28 -0000 > > @@ -40,7 +40,7 @@ > > #include > > #include > > > > -#include > > +#include > > > > /* > > * new_vmcmd(): > > Index: kern/vfs_subr.c > > =================================================================== > > RCS file: /cvs/src/sys/kern/vfs_subr.c,v > > diff -u -p -r1.332 vfs_subr.c > > --- kern/vfs_subr.c 25 Sep 2025 09:05:47 -0000 1.332 > > +++ kern/vfs_subr.c 26 Sep 2025 09:25:53 -0000 > > @@ -420,7 +420,6 @@ getnewvnode(enum vtagtype tag, struct mo > > ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) { > > splx(s); > > vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO); > > - uvm_vnp_obj_alloc(vp); > > RBT_INIT(buf_rb_bufs, &vp->v_bufs_tree); > > cache_tree_init(&vp->v_nc_tree); > > TAILQ_INIT(&vp->v_cache_dst); > > Index: uvm/uvm_extern.h > > =================================================================== > > RCS file: /cvs/src/sys/uvm/uvm_extern.h,v > > diff -u -p -r1.184 uvm_extern.h > > --- uvm/uvm_extern.h 3 Jun 2025 08:38:17 -0000 1.184 > > +++ uvm/uvm_extern.h 26 Sep 2025 09:12:03 -0000 > > @@ -443,9 +443,6 @@ void kmeminit_nkmempages(void); > > void kmeminit(void); > > extern u_int nkmempages; > > > > -struct vnode; > > -struct uvm_object *uvn_attach(struct vnode *, vm_prot_t); > > - > > struct process; > > struct kinfo_vmentry; > > int fill_vmmap(struct process *, struct kinfo_vmentry *, > > Index: uvm/uvm_vnode.c > > =================================================================== > > RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v > > diff -u -p -r1.141 uvm_vnode.c > > --- uvm/uvm_vnode.c 25 Sep 2025 09:05:47 -0000 1.141 > > +++ uvm/uvm_vnode.c 26 Sep 2025 10:18:28 -0000 > > @@ -137,7 +137,7 @@ uvn_init(void) > > struct uvm_object * > > uvn_attach(struct vnode *vp, vm_prot_t accessprot) > > { > > - struct uvm_vnode *uvn = vp->v_uvm; > > + struct uvm_vnode *uvn; > > struct vattr vattr; > > int oldflags, result; > > struct partinfo pi; > > @@ -148,7 +148,18 @@ uvn_attach(struct vnode *vp, vm_prot_t a > > return NULL; > > } > > > > - /* first get a lock on the uvn. */ > > + if (vp->v_uvm == NULL) { > > + uvn = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO); > > + KERNEL_ASSERT_LOCKED(); > > + if (vp->v_uvm == NULL) { > > + uvm_obj_init(&uvn->u_obj, &uvm_vnodeops, 0); > > + uvn->u_vnode = vp; > > + vp->v_uvm = uvn; > > + } else > > + pool_put(&uvm_vnode_pool, uvn); > > + } > > + > > + uvn = vp->v_uvm; > > rw_enter(uvn->u_obj.vmobjlock, RW_WRITE); > > while (uvn->u_flags & UVM_VNODE_BLOCKED) { > > uvn->u_flags |= UVM_VNODE_WANTED; > > @@ -423,10 +434,12 @@ void > > uvm_vnp_terminate(struct vnode *vp) > > { > > struct uvm_vnode *uvn = vp->v_uvm; > > - struct uvm_object *uobj = &uvn->u_obj; > > + struct uvm_object *uobj; > > int oldflags; > > > > - /* check if it is valid */ > > + if (uvn == NULL) > > + return; > > + uobj = &uvn->u_obj; > > rw_enter(uobj->vmobjlock, RW_WRITE); > > if ((uvn->u_flags & UVM_VNODE_VALID) == 0) { > > rw_exit(uobj->vmobjlock); > > @@ -1357,13 +1370,14 @@ int > > uvm_vnp_uncache(struct vnode *vp) > > { > > struct uvm_vnode *uvn = vp->v_uvm; > > - struct uvm_object *uobj = &uvn->u_obj; > > - > > - /* lock uvn part of the vnode and check if we need to do anything */ > > + struct uvm_object *uobj; > > > > + if (uvn == NULL) > > + return TRUE; > > + uobj = &uvn->u_obj; > > rw_enter(uobj->vmobjlock, RW_WRITE); > > if ((uvn->u_flags & UVM_VNODE_VALID) == 0 || > > - (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) { > > + (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) { > > rw_exit(uobj->vmobjlock); > > return TRUE; > > } > > @@ -1436,13 +1441,13 @@ void > > uvm_vnp_setsize(struct vnode *vp, off_t newsize) > > { > > struct uvm_vnode *uvn = vp->v_uvm; > > - struct uvm_object *uobj = &uvn->u_obj; > > + struct uvm_object *uobj; > > > > KERNEL_ASSERT_LOCKED(); > > - > > + if (uvn == NULL) > > + return; > > + uobj = &uvn->u_obj; > > rw_enter(uobj->vmobjlock, RW_WRITE); > > - > > - /* lock uvn and check for valid object, and if valid: do it! */ > > if (uvn->u_flags & UVM_VNODE_VALID) { > > > > /* > > @@ -1541,17 +1546,4 @@ uvm_vnp_sync(struct mount *mp) > > } > > > > rw_exit_write(&uvn_sync_lock); > > -} > > - > > -void > > -uvm_vnp_obj_alloc(struct vnode *vp) > > -{ > > - struct uvm_vnode *uvn; > > - > > - KASSERT(vp->v_uvm == NULL); > > - > > - uvn = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO); > > - uvm_obj_init(&uvn->u_obj, &uvm_vnodeops, 0); > > - uvn->u_vnode = vp; > > - vp->v_uvm = uvn; > > } > > Index: uvm/uvm_vnode.h > > =================================================================== > > RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v > > diff -u -p -r1.22 uvm_vnode.h > > --- uvm/uvm_vnode.h 25 Sep 2025 09:05:47 -0000 1.22 > > +++ uvm/uvm_vnode.h 26 Sep 2025 09:25:00 -0000 > > @@ -94,7 +94,7 @@ struct uvm_vnode { > > */ > > #define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL) > > > > -void uvm_vnp_obj_alloc(struct vnode *); > > +struct uvm_object *uvn_attach(struct vnode *, vm_prot_t); > > void uvm_vnp_terminate(struct vnode *); > > > > #endif /* _UVM_UVM_VNODE_H_ */ > >