From: Martin Pieuchot Subject: Re: vm_upgrade To: tech@openbsd.org Date: Thu, 4 Sep 2025 17:44:05 +0200 On 02/09/25(Tue) 20:52, Martin Pieuchot wrote: > On 15/08/25(Fri) 09:45, Martin Pieuchot wrote: > > On 22/07/25(Tue) 16:54, Martin Pieuchot wrote: > > > Instead of doing an unlock & relock dance before calling amap_copy() we > > > can simply upgrade the lock. This improves page fault performances from > > > 1%. > > > > > > As explained in the other thread we can't do better for the moment. > > > > > > ok? > > > > Anyone? > > Updated diff including a fix for a read vs write unlock in error path > found by sthen@. Updated diff after some inputs from mlarkin@: - fix some KNF & spelling - turn vm_map_downgrade() into a void, it always succeed. - do not modify the map's timestamp in vm_map_downgrade() ok? Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v diff -u -p -r1.170 uvm_fault.c --- uvm/uvm_fault.c 14 Jul 2025 08:45:16 -0000 1.170 +++ uvm/uvm_fault.c 4 Sep 2025 15:26:30 -0000 @@ -158,7 +158,6 @@ static struct uvm_advice uvmadvice[MADV_ /* * private prototypes */ -static void uvmfault_amapcopy(struct uvm_faultinfo *); static inline void uvmfault_anonflush(struct vm_anon **, int); void uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t); void uvmfault_update_stats(struct uvm_faultinfo *); @@ -219,49 +218,6 @@ uvmfault_init(void) } /* - * uvmfault_amapcopy: clear "needs_copy" in a map. - * - * => called with VM data structures unlocked (usually, see below) - * => we get a write lock on the maps and clear needs_copy for a VA - * => if we are out of RAM we sleep (waiting for more) - */ -static void -uvmfault_amapcopy(struct uvm_faultinfo *ufi) -{ - for (;;) { - /* - * no mapping? give up. - */ - if (uvmfault_lookup(ufi, TRUE) == FALSE) - return; - - /* - * copy if needed. - */ - if (UVM_ET_ISNEEDSCOPY(ufi->entry)) - amap_copy(ufi->map, ufi->entry, M_NOWAIT, - UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE, - ufi->orig_rvaddr, ufi->orig_rvaddr + 1); - - /* - * didn't work? must be out of RAM. unlock and sleep. - */ - if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { - uvmfault_unlockmaps(ufi, TRUE); - uvm_wait("fltamapcopy"); - continue; - } - - /* - * got it! unlock and return. - */ - uvmfault_unlockmaps(ufi, TRUE); - return; - } - /*NOTREACHED*/ -} - -/* * uvmfault_anonget: get data in an anon into a non-busy, non-released * page in that anon. * @@ -734,14 +690,15 @@ uvm_fault_check(struct uvm_faultinfo *uf struct vm_amap *amap; struct uvm_object *uobj; int nback, nforw; + boolean_t write_locked = FALSE; /* * lookup and lock the maps */ - if (uvmfault_lookup(ufi, FALSE) == FALSE) { +lookup: + if (uvmfault_lookup(ufi, write_locked) == FALSE) { return EFAULT; } - /* locked: maps(read) */ #ifdef DIAGNOSTIC if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) @@ -753,7 +710,7 @@ uvm_fault_check(struct uvm_faultinfo *uf * check protection */ if ((ufi->entry->protection & flt->access_type) != flt->access_type) { - uvmfault_unlockmaps(ufi, FALSE); + uvmfault_unlockmaps(ufi, write_locked); return EACCES; } @@ -779,11 +736,27 @@ uvm_fault_check(struct uvm_faultinfo *uf if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { if ((flt->access_type & PROT_WRITE) || (ufi->entry->object.uvm_obj == NULL)) { - /* need to clear */ - uvmfault_unlockmaps(ufi, FALSE); - uvmfault_amapcopy(ufi); + /* modifying `ufi->entry' requires write lock */ + if (!write_locked) { + write_locked = TRUE; + if (!vm_map_upgrade(ufi->map)) { + uvmfault_unlockmaps(ufi, FALSE); + goto lookup; + } + } + + amap_copy(ufi->map, ufi->entry, M_NOWAIT, + UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE, + ufi->orig_rvaddr, ufi->orig_rvaddr + 1); + + /* didn't work? must be out of RAM. */ + if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { + uvmfault_unlockmaps(ufi, write_locked); + uvm_wait("fltamapcopy"); + return ERESTART; + } + counters_inc(uvmexp_counters, flt_amcopy); - return ERESTART; } else { /* * ensure that we pmap_enter page R/O since @@ -791,6 +764,11 @@ uvm_fault_check(struct uvm_faultinfo *uf */ flt->enter_prot &= ~PROT_WRITE; } + } + + if (write_locked) { + vm_map_downgrade(ufi->map); + write_locked = FALSE; } /* Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v diff -u -p -r1.345 uvm_map.c --- uvm/uvm_map.c 3 Jun 2025 08:38:17 -0000 1.345 +++ uvm/uvm_map.c 4 Sep 2025 15:38:00 -0000 @@ -5223,6 +5225,51 @@ vm_map_unlock_read_ln(struct vm_map *map rw_exit_read(&map->lock); else mtx_leave(&map->mtx); +} + +boolean_t +vm_map_upgrade_ln(struct vm_map *map, char *file, int line) +{ + int rv; + + if (map->flags & VM_MAP_INTRSAFE) { + MUTEX_ASSERT_LOCKED(&map->mtx); + } else { + struct proc *busy; + + mtx_enter(&map->flags_lock); + busy = map->busy; + mtx_leave(&map->flags_lock); + if (busy != NULL && busy != curproc) + return FALSE; + + rv = rw_enter(&map->lock, RW_UPGRADE|RW_NOSLEEP); + if (rv != 0) + return FALSE; + } + + map->timestamp++; + LPRINTF(("map upgrade: %p (at %s %d)\n", map, file, line)); + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); + + return TRUE; +} + +void +vm_map_downgrade_ln(struct vm_map *map, char *file, int line) +{ + int rv; + + uvm_tree_sanity(map, file, line); + uvm_tree_size_chk(map, file, line); + LPRINTF(("map downgrade: %p (at %s %d)\n", map, file, line)); + if (map->flags & VM_MAP_INTRSAFE) { + MUTEX_ASSERT_LOCKED(&map->mtx); + } else { + rv = rw_enter(&map->lock, RW_DOWNGRADE); + KASSERT(rv == 0); + } } void Index: uvm/uvm_map.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.h,v diff -u -p -r1.94 uvm_map.h --- uvm/uvm_map.h 15 Nov 2024 02:59:23 -0000 1.94 +++ uvm/uvm_map.h 4 Sep 2025 15:36:38 -0000 @@ -402,6 +402,8 @@ void vm_map_lock_ln(struct vm_map*, cha void vm_map_lock_read_ln(struct vm_map*, char*, int); void vm_map_unlock_ln(struct vm_map*, char*, int); void vm_map_unlock_read_ln(struct vm_map*, char*, int); +boolean_t vm_map_upgrade_ln(struct vm_map*, char*, int); +void vm_map_downgrade_ln(struct vm_map*, char*, int); void vm_map_busy_ln(struct vm_map*, char*, int); void vm_map_unbusy_ln(struct vm_map*, char*, int); void vm_map_assert_anylock_ln(struct vm_map*, char*, int); @@ -413,6 +415,8 @@ void vm_map_assert_wrlock_ln(struct vm_ #define vm_map_lock_read(map) vm_map_lock_read_ln(map, __FILE__, __LINE__) #define vm_map_unlock(map) vm_map_unlock_ln(map, __FILE__, __LINE__) #define vm_map_unlock_read(map) vm_map_unlock_read_ln(map, __FILE__, __LINE__) +#define vm_map_upgrade(map) vm_map_upgrade_ln(map, __FILE__, __LINE__) +#define vm_map_downgrade(map) vm_map_downgrade_ln(map, __FILE__, __LINE__) #define vm_map_busy(map) vm_map_busy_ln(map, __FILE__, __LINE__) #define vm_map_unbusy(map) vm_map_unbusy_ln(map, __FILE__, __LINE__) #define vm_map_assert_anylock(map) \