Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
uvm_fault: logic for using shared lock
To:
tech@openbsd.org
Date:
Fri, 20 Dec 2024 20:16:20 +0100

Download raw body.

Thread
Diff below introduces a `lower_lock_type' variable to the context
descriptor used in the fault handler.  This variable is currently always
set to RW_WRITE to not introduce any change in behavior.

Related assertions are also updated in this diff.

The rw_enter() in uvm_fault_lower_lookup() is the one which is currently
highly contended.  The next step will be to grab a RW_READ lock for read
faults.

ok?

Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
diff -u -p -r1.155 uvm_fault.c
--- uvm/uvm_fault.c	20 Dec 2024 18:46:51 -0000	1.155
+++ uvm/uvm_fault.c	20 Dec 2024 19:05:16 -0000
@@ -614,6 +614,7 @@ struct uvm_faultctx {
 	boolean_t wired;
 	paddr_t pa_flags;
 	boolean_t promote;
+	int lower_lock_type;
 };
 
 int		uvm_fault_check(
@@ -657,6 +658,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
 	flt.access_type = access_type;
 	flt.narrow = FALSE;		/* assume normal fault for now */
 	flt.wired = FALSE;		/* assume non-wired fault for now */
+	flt.lower_lock_type = RW_WRITE;	/* exclusive lock for now */
 
 	error = ERESTART;
 	while (error == ERESTART) { /* ReFault: */
@@ -1152,7 +1154,7 @@ uvm_fault_lower_lookup(
 	vaddr_t currva;
 	paddr_t pa;
 
-	rw_enter(uobj->vmobjlock, RW_WRITE);
+	rw_enter(uobj->vmobjlock, flt->lower_lock_type);
 
 	counters_inc(uvmexp_counters, flt_lget);
 	gotpages = flt->npages;
@@ -1278,7 +1280,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 	KASSERT(amap == NULL ||
 	    rw_write_held(amap->am_lock));
 	KASSERT(uobj == NULL ||
-	    rw_write_held(uobj->vmobjlock));
+	    rw_status(uobj->vmobjlock) == flt->lower_lock_type);
 
 	/*
 	 * note that uobjpage can not be PGO_DONTCARE at this point.  we now
@@ -1342,14 +1344,17 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 		 * we are faulting directly on the page.
 		 */
 	} else {
+		KASSERT(amap != NULL);
+
+		/* promoting requires a write lock. */
+	        KASSERT(rw_write_held(amap->am_lock));
+	        KASSERT(uobj == NULL ||
+	            rw_status(uobj->vmobjlock) == flt->lower_lock_type);
+
 		/*
 		 * if we are going to promote the data to an anon we
 		 * allocate a blank anon here and plug it into our amap.
 		 */
-#ifdef DIAGNOSTIC
-		if (amap == NULL)
-			panic("uvm_fault: want to promote data, but no anon");
-#endif
 		error = uvmfault_promote(ufi, uobjpage, &anon, &pg);
 		if (error)
 			return error;
@@ -1412,17 +1417,21 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 		}
 	}
 
-	/* note: pg is either the uobjpage or the new page in the new anon */
+	/*
+	 * anon must be write locked (promotion).  uobj can be either.
+	 *
+	 * Note: pg is either the uobjpage or the new page in the new anon.
+	 */
+	KASSERT(amap == NULL ||
+	    rw_write_held(amap->am_lock));
+	KASSERT(uobj == NULL ||
+	    rw_status(uobj->vmobjlock) == flt->lower_lock_type);
+	KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
+
 	/*
 	 * all resources are present.   we can now map it in and free our
 	 * resources.
 	 */
-	if (amap == NULL)
-		KASSERT(anon == NULL);
-	else {
-		KASSERT(rw_write_held(amap->am_lock));
-		KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
-	}
 	if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
 	    VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot,
 	    flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) {
@@ -1547,7 +1556,9 @@ uvm_fault_lower_io(
 	/* might be changed */
 	if (pg != PGO_DONTCARE) {
 		uobj = pg->uobject;
-		rw_enter(uobj->vmobjlock, RW_WRITE);
+		rw_enter(uobj->vmobjlock, flt->lower_lock_type);
+		KASSERT((pg->pg_flags & PG_BUSY) != 0);
+		KASSERT(flt->lower_lock_type == RW_WRITE);
 	}
 
 	/*