Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: pgo_fault & VM_PAGER_* values
To:
Martin Pieuchot <mpi@grenadille.net>
Cc:
tb@openbsd.org, tech@openbsd.org, jsg@openbsd.org
Date:
Mon, 09 Dec 2024 12:21:25 +0100

Download raw body.

Thread
> Date: Mon, 9 Dec 2024 11:38:53 +0100
> From: Martin Pieuchot <mpi@grenadille.net>

Not looked at the complete diff in detail yet, but I spotted something
that raised a question about the approach taken:

You're mapping both VM_PAGER_BAD and VM_PAGER_ERROR to EACCESS here.
But those really are different things and the difference should really
be visible in userland.  When I wrote this code, my expectation was
that return VM_PAGER_ERROR would result in uvm_fault() returning EIO,
which would then be translated into a SIGBUS.  Now, looking at
uvm_fault.c it seems we currently do return EACCESS in that case.
Maybe that was always wrong or maybe this got lost in some kind of
refactoring.

Now you probably don't want to introduce a change of behaviour with
this diff.  But maybe it is a good idea to mark those VM_PAGER_ERROR
cases that now become EACCESS with /* XXX */ such that we can revisit
them later.  That's what you did with the -ENOMEM case in
i915_error_to_vmf_fault(), which we may indeed want to propagate up as
ENOMEM instead of EACCESS as well.  But that needs a bit of thought.

Also, returning errno values instead of the VM_PAGER_XXX constants
means that it becomes less obvious what error values can actually be
returned.  We probably should document the accepted error values
somewhere.  Maybe we should add those to the uvm_fault(9) page?

Cheers,

Mark

> On 05/12/24(Thu) 00:22, Theo Buehler wrote:
> > On Wed, Dec 04, 2024 at 10:52:59AM +0100, Martin Pieuchot wrote:
> > > Similar to the previous change in uvmfault_anonget(), the diff below
> > > gets rid of the VM_PAGER_* values and use errno values instead in all
> > > the pgo_fault() handlers.
> > > 
> > > The goal of this change is to reduce the differences with NetBSD's UVM
> > > and then to return different error codes.
> > > 
> > > I've annotated return values that are currently EACCES but could be
> > > something else.
> > > 
> > > While here grab the KERNEL_LOCK() after the UVM object lock to reduce
> > > contention.
> > 
> > This generally looks good to me - for a while I thought the diff was
> > incomplete since I looked at the wrong version of some functions under
> > #ifdef __linux__
> > 
> > I dislike that in a few handlers the vm_fault_t ret changes from holding
> > a VM_* code to a type holding an errno. But that's C being the terrible
> > type unsafe language that it is.
> > 
> > In udv_fault() there is a retval = VM_PAGER_OK that should be retval = 0.
> > 
> > In ttm_bo_vm_fault() you deduplicated the return value handling of
> > ttm_bo_vm_fault() with the exit path, which you didn't do in the other
> > functions. I would either undo that or do it for all the functions, e.g.,
> > in amdgpu_gem_fault() do
> > 
> > unlock:
> > 	dma_resv_unlock(bo->base.resv);
> > out:
> > 	switch (ret) {
> > 	case VM_FAULT_NOPAGE:
> > ...
> 
> Updated diff below that addresses both issues.
> 
> ok?
> 
> Index: dev/pci/drm/drm_gem.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_gem.c,v
> diff -u -p -r1.23 drm_gem.c
> --- dev/pci/drm/drm_gem.c	16 Jan 2024 23:37:51 -0000	1.23
> +++ dev/pci/drm/drm_gem.c	9 Dec 2024 09:56:24 -0000
> @@ -102,10 +102,9 @@ drm_fault(struct uvm_faultinfo *ufi, vad
>  	 * we do not allow device mappings to be mapped copy-on-write
>  	 * so we kill any attempt to do so here.
>  	 */
> -	
>  	if (UVM_ET_ISCOPYONWRITE(entry)) {
>  		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
> -		return(VM_PAGER_ERROR);
> +		return EACCES;
>  	}
>  
>  	/*
> @@ -125,7 +124,7 @@ drm_fault(struct uvm_faultinfo *ufi, vad
>  			    PZERO, "drmflt", INFSLP);
>  		}
>  		mtx_leave(&dev->quiesce_mtx);
> -		return(VM_PAGER_REFAULT);
> +		return ERESTART;
>  	}
>  	dev->quiesce_count++;
>  	mtx_leave(&dev->quiesce_mtx);
> @@ -141,7 +140,7 @@ drm_fault(struct uvm_faultinfo *ufi, vad
>  		wakeup(&dev->quiesce_count);
>  	mtx_leave(&dev->quiesce_mtx);
>  
> -	return (ret);
> +	return ret;
>  }
>  
>  boolean_t	
> Index: dev/pci/drm/drm_gem_dma_helper.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/drm_gem_dma_helper.c,v
> diff -u -p -r1.4 drm_gem_dma_helper.c
> --- dev/pci/drm/drm_gem_dma_helper.c	10 Nov 2024 06:51:59 -0000	1.4
> +++ dev/pci/drm/drm_gem_dma_helper.c	9 Dec 2024 09:56:24 -0000
> @@ -208,7 +208,7 @@ drm_gem_dma_fault(struct drm_gem_object 
>  		paddr = bus_dmamem_mmap(obj->dmat, obj->dmasegs, 1,
>  		    offset, access_type, BUS_DMA_NOCACHE);
>  		if (paddr == -1) {
> -			retval = VM_PAGER_BAD;
> +			retval = EACCES;
>  			break;
>  		}
>  
> @@ -218,7 +218,7 @@ drm_gem_dma_fault(struct drm_gem_object 
>  			uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
>  			    uobj);
>  			uvm_wait("drm_gem_dma_fault");
> -			return VM_PAGER_REFAULT;
> +			return ERESTART;
>  		}
>  	}
>  
> Index: dev/pci/drm/amd/amdgpu/amdgpu_gem.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gem.c,v
> diff -u -p -r1.10 amdgpu_gem.c
> --- dev/pci/drm/amd/amdgpu/amdgpu_gem.c	16 Jan 2024 23:37:52 -0000	1.10
> +++ dev/pci/drm/amd/amdgpu/amdgpu_gem.c	9 Dec 2024 09:59:49 -0000
> @@ -99,19 +99,7 @@ amdgpu_gem_fault(struct uvm_faultinfo *u
>  
>  	ret = ttm_bo_vm_reserve(bo);
>  	if (ret) {
> -		switch (ret) {
> -		case VM_FAULT_NOPAGE:
> -			ret = VM_PAGER_OK;
> -			break;
> -		case VM_FAULT_RETRY:
> -			ret = VM_PAGER_REFAULT;
> -			break;
> -		default:
> -			ret = VM_PAGER_BAD;
> -			break;
> -		}
> -		uvmfault_unlockall(ufi, NULL, uobj);
> -		return ret;
> +		goto out;
>  	}
>  
>  	if (drm_dev_enter(ddev, &idx)) {
> @@ -137,18 +125,19 @@ amdgpu_gem_fault(struct uvm_faultinfo *u
>  #endif
>  
>  unlock:
> +	dma_resv_unlock(bo->base.resv);
> +out:
>  	switch (ret) {
>  	case VM_FAULT_NOPAGE:
> -		ret = VM_PAGER_OK;
> +		ret = 0;
>  		break;
>  	case VM_FAULT_RETRY:
> -		ret = VM_PAGER_REFAULT;
> +		ret = ERESTART;
>  		break;
>  	default:
> -		ret = VM_PAGER_BAD;
> +		ret = EACCES;
>  		break;
>  	}
> -	dma_resv_unlock(bo->base.resv);
>  	uvmfault_unlockall(ufi, NULL, uobj);
>  	return ret;
>  }
> Index: dev/pci/drm/i915/gem/i915_gem_mman.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/i915/gem/i915_gem_mman.c,v
> diff -u -p -r1.20 i915_gem_mman.c
> --- dev/pci/drm/i915/gem/i915_gem_mman.c	19 Aug 2024 11:18:29 -0000	1.20
> +++ dev/pci/drm/i915/gem/i915_gem_mman.c	9 Dec 2024 09:56:24 -0000
> @@ -591,10 +591,10 @@ static int i915_error_to_vmf_fault(int e
>  	case -EFAULT: /* purged object */
>  	case -ENODEV: /* bad object, how did you get here! */
>  	case -ENXIO: /* unable to access backing store (on device) */
> -		return VM_PAGER_ERROR;
> +		return EACCES;
>  
>  	case -ENOMEM: /* our allocation failure */
> -		return VM_PAGER_ERROR;
> +		return EACCES; /* XXX */
>  
>  	case 0:
>  	case -EAGAIN:
> @@ -607,7 +607,7 @@ static int i915_error_to_vmf_fault(int e
>  		 * EBUSY is ok: this just means that another thread
>  		 * already did the job.
>  		 */
> -		return VM_PAGER_OK;
> +		return 0;
>  	}
>  }
>  
> @@ -629,11 +629,11 @@ vm_fault_cpu(struct i915_mmap_offset *mm
>  	/* Sanity check that we allow writing into this object */
>  	if (unlikely(i915_gem_object_is_readonly(obj) && write)) {
>  		uvmfault_unlockall(ufi, NULL, &obj->base.uobj);
> -		return VM_PAGER_BAD;
> +		return EACCES;
>  	}
>  
>  	if (i915_gem_object_lock_interruptible(obj, NULL))
> -		return VM_PAGER_ERROR;
> +		return EACCES;
>  
>  	err = i915_gem_object_pin_pages(obj);
>  	if (err)
> @@ -921,7 +921,7 @@ i915_gem_fault(struct drm_gem_object *ge
>  	drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
>  	if (!mmo) {
>  		uvmfault_unlockall(ufi, NULL, &gem_obj->uobj);
> -		return VM_PAGER_BAD;
> +		return EACCES;
>  	}
>  
>  	KASSERT(gem_obj == &mmo->obj->base);
> Index: dev/pci/drm/i915/gem/i915_gem_ttm.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/i915/gem/i915_gem_ttm.c,v
> diff -u -p -r1.8 i915_gem_ttm.c
> --- dev/pci/drm/i915/gem/i915_gem_ttm.c	11 Oct 2024 02:48:48 -0000	1.8
> +++ dev/pci/drm/i915/gem/i915_gem_ttm.c	9 Dec 2024 09:56:24 -0000
> @@ -1240,20 +1240,20 @@ vm_fault_ttm(struct uvm_faultinfo *ufi, 
>  	/* Sanity check that we allow writing into this object */
>  	if (unlikely(i915_gem_object_is_readonly(obj) && write)) {
>  		uvmfault_unlockall(ufi, NULL, &obj->base.uobj);
> -		return VM_PAGER_BAD;
> +		return EACCES;
>  	}
>  
>  	ret = ttm_bo_vm_reserve(bo);
>  	if (ret) {
>  		switch (ret) {
>  		case VM_FAULT_NOPAGE:
> -			ret = VM_PAGER_OK;
> +			ret = 0;
>  			break;
>  		case VM_FAULT_RETRY:
> -			ret = VM_PAGER_REFAULT;
> +			ret = ERESTART;
>  			break;
>  		default:
> -			ret = VM_PAGER_BAD;
> +			ret = EACCES;
>  			break;
>  		}
>  		uvmfault_unlockall(ufi, NULL, &obj->base.uobj);
> @@ -1263,7 +1263,7 @@ vm_fault_ttm(struct uvm_faultinfo *ufi, 
>  	if (obj->mm.madv != I915_MADV_WILLNEED) {
>  		dma_resv_unlock(bo->base.resv);
>  		uvmfault_unlockall(ufi, NULL, &obj->base.uobj);
> -		return VM_PAGER_BAD;
> +		return EACCES;
>  	}
>  
>  	/*
> @@ -1285,7 +1285,7 @@ vm_fault_ttm(struct uvm_faultinfo *ufi, 
>  		if (err) {
>  			dma_resv_unlock(bo->base.resv);
>  			uvmfault_unlockall(ufi, NULL, &obj->base.uobj);
> -			return VM_PAGER_BAD;
> +			return EACCES;
>  		}
>  	} else if (!i915_ttm_resource_mappable(bo->resource)) {
>  		int err = -ENODEV;
> @@ -1359,13 +1359,13 @@ vm_fault_ttm(struct uvm_faultinfo *ufi, 
>  out_rpm:
>  	switch (ret) {
>  	case VM_FAULT_NOPAGE:
> -		ret = VM_PAGER_OK;
> +		ret = 0;
>  		break;
>  	case VM_FAULT_RETRY:
> -		ret = VM_PAGER_REFAULT;
> +		ret = ERESTART;
>  		break;
>  	default:
> -		ret = VM_PAGER_BAD;
> +		ret = EACCES;
>  		break;
>  	}
>  
> Index: dev/pci/drm/radeon/radeon_gem.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/radeon/radeon_gem.c,v
> diff -u -p -r1.19 radeon_gem.c
> --- dev/pci/drm/radeon/radeon_gem.c	26 Jul 2024 03:42:02 -0000	1.19
> +++ dev/pci/drm/radeon/radeon_gem.c	9 Dec 2024 09:56:24 -0000
> @@ -115,13 +115,13 @@ unlock_resv:
>  unlock_mclk:
>  	switch (ret) {
>  	case VM_FAULT_NOPAGE:
> -		ret = VM_PAGER_OK;
> +		ret = 0;
>  		break;
>  	case VM_FAULT_RETRY:
> -		ret = VM_PAGER_REFAULT;
> +		ret = ERESTART;
>  		break;
>  	default:
> -		ret = VM_PAGER_BAD;
> +		ret = EACCES;
>  		break;
>  	}
>  	up_read(&rdev->pm.mclk_lock);
> Index: dev/pci/drm/ttm/ttm_bo_vm.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/drm/ttm/ttm_bo_vm.c,v
> diff -u -p -r1.32 ttm_bo_vm.c
> --- dev/pci/drm/ttm/ttm_bo_vm.c	16 Jan 2024 23:38:14 -0000	1.32
> +++ dev/pci/drm/ttm/ttm_bo_vm.c	9 Dec 2024 09:56:24 -0000
> @@ -598,37 +598,23 @@ ttm_bo_vm_fault(struct uvm_faultinfo *uf
>  
>  	ret = ttm_bo_vm_reserve(bo);
>  	if (ret) {
> -		switch (ret) {
> -		case VM_FAULT_NOPAGE:
> -			ret = VM_PAGER_OK;
> -			break;
> -		case VM_FAULT_RETRY:
> -			ret = VM_PAGER_REFAULT;
> -			break;
> -		default:
> -			ret = VM_PAGER_BAD;
> -			break;
> -		}
> -
> -		uvmfault_unlockall(ufi, NULL, uobj);
> -		return ret;
> +		goto out;
>  	}
>  
>  	ret = ttm_bo_vm_fault_reserved(ufi, vaddr, TTM_BO_VM_NUM_PREFAULT, 1);
> +	dma_resv_unlock(bo->base.resv);
> +out:
>  	switch (ret) {
>  	case VM_FAULT_NOPAGE:
> -		ret = VM_PAGER_OK;
> +		ret = 0;
>  		break;
>  	case VM_FAULT_RETRY:
> -		ret = VM_PAGER_REFAULT;
> +		ret = ERESTART;
>  		break;
>  	default:
> -		ret = VM_PAGER_BAD;
> +		ret = EACCES;
>  		break;
>  	}
> -
> -	dma_resv_unlock(bo->base.resv);
> -
>  	uvmfault_unlockall(ufi, NULL, uobj);
>  	return ret;
>  }
> Index: uvm/uvm_device.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_device.c,v
> diff -u -p -r1.67 uvm_device.c
> --- uvm/uvm_device.c	24 Jul 2024 12:15:55 -0000	1.67
> +++ uvm/uvm_device.c	9 Dec 2024 09:57:12 -0000
> @@ -331,7 +331,7 @@ udv_fault(struct uvm_faultinfo *ufi, vad
>  	 */
>  	if (UVM_ET_ISCOPYONWRITE(entry)) {
>  		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
> -		return(VM_PAGER_ERROR);
> +		return EACCES;
>  	}
>  
>  	/*
> @@ -354,7 +354,7 @@ udv_fault(struct uvm_faultinfo *ufi, vad
>  	/*
>  	 * loop over the page range entering in as needed
>  	 */
> -	retval = VM_PAGER_OK;
> +	retval = 0;
>  	for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
>  	    curr_va += PAGE_SIZE) {
>  		if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
> @@ -365,7 +365,7 @@ udv_fault(struct uvm_faultinfo *ufi, vad
>  
>  		paddr = (*mapfn)(device, curr_offset, access_type);
>  		if (paddr == -1) {
> -			retval = VM_PAGER_ERROR;
> +			retval = EACCES; /* XXX */
>  			break;
>  		}
>  		mapprot = ufi->entry->protection;
> @@ -387,11 +387,11 @@ udv_fault(struct uvm_faultinfo *ufi, vad
>  			/* sync what we have so far */
>  			pmap_update(ufi->orig_map->pmap);      
>  			uvm_wait("udv_fault");
> -			return (VM_PAGER_REFAULT);
> +			return ERESTART;
>  		}
>  	}
>  
>  	uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
>  	pmap_update(ufi->orig_map->pmap);
> -	return (retval);
> +	return retval;
>  }
> Index: uvm/uvm_fault.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> diff -u -p -r1.152 uvm_fault.c
> --- uvm/uvm_fault.c	4 Dec 2024 09:21:06 -0000	1.152
> +++ uvm/uvm_fault.c	9 Dec 2024 09:56:24 -0000
> @@ -621,20 +621,13 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>  			 * providing a pgo_fault routine.
>  			 */
>  			if (uobj != NULL && uobj->pgops->pgo_fault != NULL) {
> -				KERNEL_LOCK();
>  				rw_enter(uobj->vmobjlock, RW_WRITE);
> +				KERNEL_LOCK();
>  				error = uobj->pgops->pgo_fault(&ufi,
>  				    flt.startva, pages, flt.npages,
>  				    flt.centeridx, fault_type, flt.access_type,
>  				    PGO_LOCKED);
>  				KERNEL_UNLOCK();
> -
> -				if (error == VM_PAGER_OK)
> -					error = 0;
> -				else if (error == VM_PAGER_REFAULT)
> -					error = ERESTART;
> -				else
> -					error = EACCES;
>  			} else {
>  				/* case 2: fault on backing obj or zero fill */
>  				error = uvm_fault_lower(&ufi, &flt, pages);
> 
> 
>