From: Martin Pieuchot Subject: uvm_fault: logic for using shared lock To: tech@openbsd.org Date: Fri, 20 Dec 2024 20:16:20 +0100 Diff below introduces a `lower_lock_type' variable to the context descriptor used in the fault handler. This variable is currently always set to RW_WRITE to not introduce any change in behavior. Related assertions are also updated in this diff. The rw_enter() in uvm_fault_lower_lookup() is the one which is currently highly contended. The next step will be to grab a RW_READ lock for read faults. ok? Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v diff -u -p -r1.155 uvm_fault.c --- uvm/uvm_fault.c 20 Dec 2024 18:46:51 -0000 1.155 +++ uvm/uvm_fault.c 20 Dec 2024 19:05:16 -0000 @@ -614,6 +614,7 @@ struct uvm_faultctx { boolean_t wired; paddr_t pa_flags; boolean_t promote; + int lower_lock_type; }; int uvm_fault_check( @@ -657,6 +658,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad flt.access_type = access_type; flt.narrow = FALSE; /* assume normal fault for now */ flt.wired = FALSE; /* assume non-wired fault for now */ + flt.lower_lock_type = RW_WRITE; /* exclusive lock for now */ error = ERESTART; while (error == ERESTART) { /* ReFault: */ @@ -1152,7 +1154,7 @@ uvm_fault_lower_lookup( vaddr_t currva; paddr_t pa; - rw_enter(uobj->vmobjlock, RW_WRITE); + rw_enter(uobj->vmobjlock, flt->lower_lock_type); counters_inc(uvmexp_counters, flt_lget); gotpages = flt->npages; @@ -1278,7 +1280,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf KASSERT(amap == NULL || rw_write_held(amap->am_lock)); KASSERT(uobj == NULL || - rw_write_held(uobj->vmobjlock)); + rw_status(uobj->vmobjlock) == flt->lower_lock_type); /* * note that uobjpage can not be PGO_DONTCARE at this point. we now @@ -1342,14 +1344,17 @@ uvm_fault_lower(struct uvm_faultinfo *uf * we are faulting directly on the page. */ } else { + KASSERT(amap != NULL); + + /* promoting requires a write lock. */ + KASSERT(rw_write_held(amap->am_lock)); + KASSERT(uobj == NULL || + rw_status(uobj->vmobjlock) == flt->lower_lock_type); + /* * if we are going to promote the data to an anon we * allocate a blank anon here and plug it into our amap. */ -#ifdef DIAGNOSTIC - if (amap == NULL) - panic("uvm_fault: want to promote data, but no anon"); -#endif error = uvmfault_promote(ufi, uobjpage, &anon, &pg); if (error) return error; @@ -1412,17 +1417,21 @@ uvm_fault_lower(struct uvm_faultinfo *uf } } - /* note: pg is either the uobjpage or the new page in the new anon */ + /* + * anon must be write locked (promotion). uobj can be either. + * + * Note: pg is either the uobjpage or the new page in the new anon. + */ + KASSERT(amap == NULL || + rw_write_held(amap->am_lock)); + KASSERT(uobj == NULL || + rw_status(uobj->vmobjlock) == flt->lower_lock_type); + KASSERT(anon == NULL || anon->an_lock == amap->am_lock); + /* * all resources are present. we can now map it in and free our * resources. */ - if (amap == NULL) - KASSERT(anon == NULL); - else { - KASSERT(rw_write_held(amap->am_lock)); - KASSERT(anon == NULL || anon->an_lock == amap->am_lock); - } if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot, flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) { @@ -1547,7 +1556,9 @@ uvm_fault_lower_io( /* might be changed */ if (pg != PGO_DONTCARE) { uobj = pg->uobject; - rw_enter(uobj->vmobjlock, RW_WRITE); + rw_enter(uobj->vmobjlock, flt->lower_lock_type); + KASSERT((pg->pg_flags & PG_BUSY) != 0); + KASSERT(flt->lower_lock_type == RW_WRITE); } /*