Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: Fewer pages used for mmap descriptors
To:
Mateusz Guzik <mjguzik@gmail.com>
Cc:
mpi@grenadille.net, tech@openbsd.org
Date:
Sat, 27 Sep 2025 14:38:51 +0200

Download raw body.

Thread
> Date: Sat, 27 Sep 2025 13:55:15 +0200
> From: Mateusz Guzik <mjguzik@gmail.com>
> 
> On Fri, Sep 26, 2025 at 10:21:26AM +0000, Martin Pieuchot wrote:
> > The kernel currently allocates a "mmap descriptor" (AKA UVM vnode
> > descriptor) for every vnode.  This wastes a lot of memory.
> > 
> > Diff below moves the allocation of such descriptors into uvn_attach()
> > and greatly reduces the waste.  Note that such descriptors are, like
> > vnodes, never freed.  Nothing is changed in that regard.
> > 
> > On a small VM with 128M or RAM the difference after boot is huge:
> > 
> > before:
> > Name        Size Requests Fail    InUse Pgreq Pgrel Npage Hiwat Minpg Maxpg Idle
> > uvmvnodes     80     1533    0     1533    32     0    32    32     0     8    0
> > 
> > after:
> > Name        Size Requests Fail    InUse Pgreq Pgrel Npage Hiwat Minpg Maxpg Idle
> > uvmvnodes     80       74    0       74     2     0     2     2     0     8    0
> > 
> 
> Do I read correctly that the association between a vnode and "mmap
> descriptor" is constant after it gets established?
> 
> I presume the saving is coming from not allocating the object for
> directories.

It is also not allocated for files that are never mmaped.

> Since getnewvnode() is not told what the allocated vnode is going to be,
> I expect the total count of uvmvnodes will be creeping up as vnodes get
> recycled and have their type changed VDIR<->VREG.

Not exactly, but yes, I expect that if you leave a machine up long
enough, the number of uvmvnodes will creep up towards the vnodes.

> Assuming the assocation has to stay constant, I suspect the idea can be
> greatly helped by telling getnewvnode about what is being allocated.
> Then it can look at first n vnodes on the LRU in hopes of recycling a
> matching type (and preventing/reusing an allocation respectively),
> intead of just grabbing the first one it sees.
> 
> By matching type I mean known to use uvmvnodes vs not.
> 
> > 
> > Index: kern/exec_subr.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/exec_subr.c,v
> > diff -u -p -r1.70 exec_subr.c
> > --- kern/exec_subr.c	20 Sep 2025 13:53:36 -0000	1.70
> > +++ kern/exec_subr.c	26 Sep 2025 09:12:28 -0000
> > @@ -40,7 +40,7 @@
> >  #include <sys/mman.h>
> >  #include <sys/resourcevar.h>
> >  
> > -#include <uvm/uvm_extern.h>
> > +#include <uvm/uvm_vnode.h>
> >  
> >  /*
> >   * new_vmcmd():
> > Index: kern/vfs_subr.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/vfs_subr.c,v
> > diff -u -p -r1.332 vfs_subr.c
> > --- kern/vfs_subr.c	25 Sep 2025 09:05:47 -0000	1.332
> > +++ kern/vfs_subr.c	26 Sep 2025 09:25:53 -0000
> > @@ -420,7 +420,6 @@ getnewvnode(enum vtagtype tag, struct mo
> >  	    ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
> >  		splx(s);
> >  		vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
> > -		uvm_vnp_obj_alloc(vp);
> >  		RBT_INIT(buf_rb_bufs, &vp->v_bufs_tree);
> >  		cache_tree_init(&vp->v_nc_tree);
> >  		TAILQ_INIT(&vp->v_cache_dst);
> > Index: uvm/uvm_extern.h
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> > diff -u -p -r1.184 uvm_extern.h
> > --- uvm/uvm_extern.h	3 Jun 2025 08:38:17 -0000	1.184
> > +++ uvm/uvm_extern.h	26 Sep 2025 09:12:03 -0000
> > @@ -443,9 +443,6 @@ void			kmeminit_nkmempages(void);
> >  void			kmeminit(void);
> >  extern u_int		nkmempages;
> >  
> > -struct vnode;
> > -struct uvm_object	*uvn_attach(struct vnode *, vm_prot_t);
> > -
> >  struct process;
> >  struct kinfo_vmentry;
> >  int			fill_vmmap(struct process *, struct kinfo_vmentry *,
> > Index: uvm/uvm_vnode.c
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
> > diff -u -p -r1.141 uvm_vnode.c
> > --- uvm/uvm_vnode.c	25 Sep 2025 09:05:47 -0000	1.141
> > +++ uvm/uvm_vnode.c	26 Sep 2025 10:18:28 -0000
> > @@ -137,7 +137,7 @@ uvn_init(void)
> >  struct uvm_object *
> >  uvn_attach(struct vnode *vp, vm_prot_t accessprot)
> >  {
> > -	struct uvm_vnode *uvn = vp->v_uvm;
> > +	struct uvm_vnode *uvn;
> >  	struct vattr vattr;
> >  	int oldflags, result;
> >  	struct partinfo pi;
> > @@ -148,7 +148,18 @@ uvn_attach(struct vnode *vp, vm_prot_t a
> >  		return NULL;
> >  	}
> >  
> > -	/* first get a lock on the uvn. */
> > +	if (vp->v_uvm == NULL) {
> > +		uvn = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO);
> > +		KERNEL_ASSERT_LOCKED();
> > +		if (vp->v_uvm == NULL) {
> > +			uvm_obj_init(&uvn->u_obj, &uvm_vnodeops, 0);
> > +			uvn->u_vnode = vp;
> > +			vp->v_uvm = uvn;
> > +		} else
> > +			pool_put(&uvm_vnode_pool, uvn);
> > +	}
> > +
> > +	uvn = vp->v_uvm;
> >  	rw_enter(uvn->u_obj.vmobjlock, RW_WRITE);
> >  	while (uvn->u_flags & UVM_VNODE_BLOCKED) {
> >  		uvn->u_flags |= UVM_VNODE_WANTED;
> > @@ -423,10 +434,12 @@ void
> >  uvm_vnp_terminate(struct vnode *vp)
> >  {
> >  	struct uvm_vnode *uvn = vp->v_uvm;
> > -	struct uvm_object *uobj = &uvn->u_obj;
> > +	struct uvm_object *uobj;
> >  	int oldflags;
> >  
> > -	/* check if it is valid */
> > +	if (uvn == NULL)
> > +		return;
> > +	uobj = &uvn->u_obj;
> >  	rw_enter(uobj->vmobjlock, RW_WRITE);
> >  	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
> >  		rw_exit(uobj->vmobjlock);
> > @@ -1357,13 +1370,14 @@ int
> >  uvm_vnp_uncache(struct vnode *vp)
> >  {
> >  	struct uvm_vnode *uvn = vp->v_uvm;
> > -	struct uvm_object *uobj = &uvn->u_obj;
> > -
> > -	/* lock uvn part of the vnode and check if we need to do anything */
> > +	struct uvm_object *uobj;
> >  
> > +	if (uvn == NULL)
> > +		return TRUE;
> > +	uobj = &uvn->u_obj;
> >  	rw_enter(uobj->vmobjlock, RW_WRITE);
> >  	if ((uvn->u_flags & UVM_VNODE_VALID) == 0 ||
> > -			(uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
> > +	    (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
> >  		rw_exit(uobj->vmobjlock);
> >  		return TRUE;
> >  	}
> > @@ -1436,13 +1441,13 @@ void
> >  uvm_vnp_setsize(struct vnode *vp, off_t newsize)
> >  {
> >  	struct uvm_vnode *uvn = vp->v_uvm;
> > -	struct uvm_object *uobj = &uvn->u_obj;
> > +	struct uvm_object *uobj;
> >  
> >  	KERNEL_ASSERT_LOCKED();
> > -
> > +	if (uvn == NULL)
> > +		return;
> > +	uobj = &uvn->u_obj;
> >  	rw_enter(uobj->vmobjlock, RW_WRITE);
> > -
> > -	/* lock uvn and check for valid object, and if valid: do it! */
> >  	if (uvn->u_flags & UVM_VNODE_VALID) {
> >  
> >  		/*
> > @@ -1541,17 +1546,4 @@ uvm_vnp_sync(struct mount *mp)
> >  	}
> >  
> >  	rw_exit_write(&uvn_sync_lock);
> > -}
> > -
> > -void
> > -uvm_vnp_obj_alloc(struct vnode *vp)
> > -{
> > -	struct uvm_vnode *uvn;
> > -
> > -	KASSERT(vp->v_uvm == NULL);
> > -
> > -	uvn = pool_get(&uvm_vnode_pool, PR_WAITOK | PR_ZERO);
> > -	uvm_obj_init(&uvn->u_obj, &uvm_vnodeops, 0);
> > -	uvn->u_vnode = vp;
> > -	vp->v_uvm = uvn;
> >  }
> > Index: uvm/uvm_vnode.h
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_vnode.h,v
> > diff -u -p -r1.22 uvm_vnode.h
> > --- uvm/uvm_vnode.h	25 Sep 2025 09:05:47 -0000	1.22
> > +++ uvm/uvm_vnode.h	26 Sep 2025 09:25:00 -0000
> > @@ -94,7 +94,7 @@ struct uvm_vnode {
> >   */
> >  #define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL)
> >  
> > -void			uvm_vnp_obj_alloc(struct vnode *);
> > +struct uvm_object	*uvn_attach(struct vnode *, vm_prot_t);
> >  void			uvm_vnp_terminate(struct vnode *);
> >  
> >  #endif /* _UVM_UVM_VNODE_H_ */
> 
>