Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
Re: uvm_fault: logic for using shared lock
To:
tech@openbsd.org
Date:
Mon, 23 Dec 2024 10:06:10 +0100

Download raw body.

Thread
On 20/12/24(Fri) 20:16, Martin Pieuchot wrote:
> Diff below introduces a `lower_lock_type' variable to the context
> descriptor used in the fault handler.  This variable is currently always
> set to RW_WRITE to not introduce any change in behavior.
> 
> Related assertions are also updated in this diff.
> 
> The rw_enter() in uvm_fault_lower_lookup() is the one which is currently
> highly contended.  The next step will be to grab a RW_READ lock for read
> faults.

Anyone?

> Index: uvm/uvm_fault.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> diff -u -p -r1.155 uvm_fault.c
> --- uvm/uvm_fault.c	20 Dec 2024 18:46:51 -0000	1.155
> +++ uvm/uvm_fault.c	20 Dec 2024 19:05:16 -0000
> @@ -614,6 +614,7 @@ struct uvm_faultctx {
>  	boolean_t wired;
>  	paddr_t pa_flags;
>  	boolean_t promote;
> +	int lower_lock_type;
>  };
>  
>  int		uvm_fault_check(
> @@ -657,6 +658,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>  	flt.access_type = access_type;
>  	flt.narrow = FALSE;		/* assume normal fault for now */
>  	flt.wired = FALSE;		/* assume non-wired fault for now */
> +	flt.lower_lock_type = RW_WRITE;	/* exclusive lock for now */
>  
>  	error = ERESTART;
>  	while (error == ERESTART) { /* ReFault: */
> @@ -1152,7 +1154,7 @@ uvm_fault_lower_lookup(
>  	vaddr_t currva;
>  	paddr_t pa;
>  
> -	rw_enter(uobj->vmobjlock, RW_WRITE);
> +	rw_enter(uobj->vmobjlock, flt->lower_lock_type);
>  
>  	counters_inc(uvmexp_counters, flt_lget);
>  	gotpages = flt->npages;
> @@ -1278,7 +1280,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>  	KASSERT(amap == NULL ||
>  	    rw_write_held(amap->am_lock));
>  	KASSERT(uobj == NULL ||
> -	    rw_write_held(uobj->vmobjlock));
> +	    rw_status(uobj->vmobjlock) == flt->lower_lock_type);
>  
>  	/*
>  	 * note that uobjpage can not be PGO_DONTCARE at this point.  we now
> @@ -1342,14 +1344,17 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>  		 * we are faulting directly on the page.
>  		 */
>  	} else {
> +		KASSERT(amap != NULL);
> +
> +		/* promoting requires a write lock. */
> +	        KASSERT(rw_write_held(amap->am_lock));
> +	        KASSERT(uobj == NULL ||
> +	            rw_status(uobj->vmobjlock) == flt->lower_lock_type);
> +
>  		/*
>  		 * if we are going to promote the data to an anon we
>  		 * allocate a blank anon here and plug it into our amap.
>  		 */
> -#ifdef DIAGNOSTIC
> -		if (amap == NULL)
> -			panic("uvm_fault: want to promote data, but no anon");
> -#endif
>  		error = uvmfault_promote(ufi, uobjpage, &anon, &pg);
>  		if (error)
>  			return error;
> @@ -1412,17 +1417,21 @@ uvm_fault_lower(struct uvm_faultinfo *uf
>  		}
>  	}
>  
> -	/* note: pg is either the uobjpage or the new page in the new anon */
> +	/*
> +	 * anon must be write locked (promotion).  uobj can be either.
> +	 *
> +	 * Note: pg is either the uobjpage or the new page in the new anon.
> +	 */
> +	KASSERT(amap == NULL ||
> +	    rw_write_held(amap->am_lock));
> +	KASSERT(uobj == NULL ||
> +	    rw_status(uobj->vmobjlock) == flt->lower_lock_type);
> +	KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
> +
>  	/*
>  	 * all resources are present.   we can now map it in and free our
>  	 * resources.
>  	 */
> -	if (amap == NULL)
> -		KASSERT(anon == NULL);
> -	else {
> -		KASSERT(rw_write_held(amap->am_lock));
> -		KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
> -	}
>  	if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
>  	    VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot,
>  	    flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) {
> @@ -1547,7 +1556,9 @@ uvm_fault_lower_io(
>  	/* might be changed */
>  	if (pg != PGO_DONTCARE) {
>  		uobj = pg->uobject;
> -		rw_enter(uobj->vmobjlock, RW_WRITE);
> +		rw_enter(uobj->vmobjlock, flt->lower_lock_type);
> +		KASSERT((pg->pg_flags & PG_BUSY) != 0);
> +		KASSERT(flt->lower_lock_type == RW_WRITE);
>  	}
>  
>  	/*
> 
>