From: Martin Pieuchot Subject: Re: km_alloc(9), UAREA and kv_pageable To: Mark Kettenis Cc: tech@openbsd.org Date: Thu, 10 Oct 2024 10:40:02 +0200 On 09/10/24(Wed) 13:39, Mark Kettenis wrote: > > Date: Wed, 9 Oct 2024 13:16:25 +0200 > > From: Martin Pieuchot > > > > On 02/10/24(Wed) 17:28, Martin Pieuchot wrote: > > > It is currently impossible to use km_alloc(9) to get a managed mapping > > > backed with physical pages allocated up front. I guess this is why > > > uvm_km_kmemalloc_pla() is still used to allocate the UAREA. > > > > > > To fix that, I suggest the diff below which turns the "kp_pageable" flag > > > of km_alloc(9) into a "kp_managed". This new flag no longer implies > > > "kp_nomem" so I updated the description and fixed the remaining XXX to > > > preserve existing behavior of the `kp_pageable' global. > > > > > > With this change I believe we could also get rid of uvm_km_zalloc() used > > > in the i386 pmap. > > > > > > Comments? Oks? > > > > Anyone? > > I don't really understand why this needs to be "managed" memory. We > stopped paging out the uarea ages ago, and I wonder if all this > complexity is just from the time when we did. Can't this just be > kp_zero memory? On amd64 using kp_zero results in the kernel faulting in loop from which it cannot recover. Could you please look at it and tell me if there is a bug somewhere else? > > > Index: sys/uvm/uvm_extern.h > > > =================================================================== > > > RCS file: /cvs/src/sys/uvm/uvm_extern.h,v > > > diff -u -p -r1.177 uvm_extern.h > > > --- sys/uvm/uvm_extern.h 24 Aug 2024 10:46:43 -0000 1.177 > > > +++ sys/uvm/uvm_extern.h 2 Oct 2024 14:51:32 -0000 > > > @@ -328,7 +328,7 @@ struct kmem_va_mode { > > > * kp_nomem - don't allocate any backing pages. > > > * kp_maxseg - maximal amount of contiguous segments. > > > * kp_zero - zero the returned memory. > > > - * kp_pageable - allocate pageable memory. > > > + * kp_managed - map memory paged from `kp_object'. > > > */ > > > struct kmem_pa_mode { > > > struct uvm_constraint_range *kp_constraint; > > > @@ -338,7 +338,7 @@ struct kmem_pa_mode { > > > int kp_maxseg; > > > char kp_nomem; > > > char kp_zero; > > > - char kp_pageable; > > > + char kp_managed; > > > }; > > > > > > /* > > > Index: sys/uvm/uvm_glue.c > > > =================================================================== > > > RCS file: /cvs/src/sys/uvm/uvm_glue.c,v > > > diff -u -p -r1.84 uvm_glue.c > > > --- sys/uvm/uvm_glue.c 10 Sep 2022 20:35:29 -0000 1.84 > > > +++ sys/uvm/uvm_glue.c 2 Oct 2024 14:39:14 -0000 > > > @@ -259,20 +259,26 @@ uvm_vsunlock_device(struct proc *p, void > > > uvm_km_free(kernel_map, kva, sz); > > > } > > > > > > +const struct kmem_va_mode kv_uarea = { > > > + .kv_map = &kernel_map, > > > + .kv_align = USPACE_ALIGN > > > +}; > > > + > > > +const struct kmem_pa_mode kp_uarea = { > > > + .kp_constraint = &no_constraint, > > > + .kp_object = &uvm.kernel_object, > > > + .kp_zero = 1, > > > + .kp_managed = 1 > > > +}; > > > + > > > /* > > > * uvm_uarea_alloc: allocate the u-area for a new thread > > > */ > > > vaddr_t > > > uvm_uarea_alloc(void) > > > { > > > - vaddr_t uaddr; > > > - > > > - uaddr = uvm_km_kmemalloc_pla(kernel_map, uvm.kernel_object, USPACE, > > > - USPACE_ALIGN, UVM_KMF_ZERO, > > > - no_constraint.ucr_low, no_constraint.ucr_high, > > > - 0, 0, USPACE/PAGE_SIZE); > > > > > > - return (uaddr); > > > + return (vaddr_t)km_alloc(USPACE, &kv_uarea, &kp_uarea, &kd_waitok); > > > } > > > > > > /* > > > @@ -284,7 +290,7 @@ uvm_uarea_alloc(void) > > > void > > > uvm_uarea_free(struct proc *p) > > > { > > > - uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE); > > > + km_free(p->p_addr, USPACE, &kv_uarea, &kp_uarea); > > > p->p_addr = NULL; > > > } > > > > > > Index: sys/uvm/uvm_km.c > > > =================================================================== > > > RCS file: /cvs/src/sys/uvm/uvm_km.c,v > > > diff -u -p -r1.154 uvm_km.c > > > --- sys/uvm/uvm_km.c 24 Aug 2024 10:46:43 -0000 1.154 > > > +++ sys/uvm/uvm_km.c 2 Oct 2024 14:49:00 -0000 > > > @@ -743,7 +743,7 @@ km_alloc(size_t sz, const struct kmem_va > > > > > > TAILQ_INIT(&pgl); > > > > > > - if (kp->kp_nomem || kp->kp_pageable) > > > + if (kp->kp_nomem) > > > goto alloc_va; > > > > > > pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT; > > > @@ -753,7 +753,8 @@ km_alloc(size_t sz, const struct kmem_va > > > > > > pla_align = kp->kp_align; > > > #ifdef __HAVE_PMAP_DIRECT > > > - if (pla_align < kv->kv_align) > > > + if ((kv->kv_singlepage || kp->kp_maxseg == 1) && > > > + (pla_align < kv->kv_align)) > > > pla_align = kv->kv_align; > > > #endif > > > pla_maxseg = kp->kp_maxseg; > > > @@ -762,7 +763,7 @@ km_alloc(size_t sz, const struct kmem_va > > > > > > if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low, > > > kp->kp_constraint->ucr_high, pla_align, kp->kp_boundary, > > > - &pgl, pla_maxseg, pla_flags)) { > > > + &pgl, pla_maxseg, pla_flags)) { > > > return (NULL); > > > } > > > > > > @@ -783,7 +784,7 @@ km_alloc(size_t sz, const struct kmem_va > > > alloc_va: > > > prot = PROT_READ | PROT_WRITE; > > > > > > - if (kp->kp_pageable) { > > > + if (kp->kp_managed) { > > > KASSERT(kp->kp_object); > > > KASSERT(!kv->kv_singlepage); > > > } else { > > > @@ -838,7 +839,7 @@ try_map: > > > } > > > sva = va; > > > TAILQ_FOREACH(pg, &pgl, pageq) { > > > - if (kp->kp_pageable) > > > + if (kp->kp_managed) > > > pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg), > > > prot, prot | PMAP_WIRED); > > > else > > > @@ -860,9 +861,6 @@ km_free(void *v, size_t sz, const struct > > > sva = (vaddr_t)v; > > > eva = sva + sz; > > > > > > - if (kp->kp_nomem) > > > - goto free_va; > > > - > > > #ifdef __HAVE_PMAP_DIRECT > > > if (kv->kv_singlepage || kp->kp_maxseg == 1) { > > > TAILQ_INIT(&pgl); > > > @@ -887,10 +885,10 @@ km_free(void *v, size_t sz, const struct > > > } > > > #endif > > > > > > - if (kp->kp_pageable) { > > > + if (kp->kp_managed) { > > > pmap_remove(pmap_kernel(), sva, eva); > > > pmap_update(pmap_kernel()); > > > - } else { > > > + } else if (!kp->kp_nomem) { > > > TAILQ_INIT(&pgl); > > > for (va = sva; va < eva; va += PAGE_SIZE) { > > > paddr_t pa; > > > @@ -908,7 +906,7 @@ km_free(void *v, size_t sz, const struct > > > pmap_update(pmap_kernel()); > > > uvm_pglistfree(&pgl); > > > } > > > -free_va: > > > + > > > uvm_unmap(*kv->kv_map, sva, eva); > > > if (kv->kv_wait) > > > wakeup(*kv->kv_map); > > > @@ -951,8 +949,8 @@ const struct kmem_pa_mode kp_zero = { > > > > > > const struct kmem_pa_mode kp_pageable = { > > > .kp_object = &uvm.kernel_object, > > > - .kp_pageable = 1 > > > -/* XXX - kp_nomem, maybe, but we'll need to fix km_free. */ > > > + .kp_managed = 1, > > > + .kp_nomem = 1 > > > }; > > > > > > const struct kmem_pa_mode kp_none = { > > > Index: share/man/man9/km_alloc.9 > > > =================================================================== > > > RCS file: /cvs/src/share/man/man9/km_alloc.9,v > > > diff -u -p -r1.9 km_alloc.9 > > > --- share/man/man9/km_alloc.9 6 Dec 2019 19:15:16 -0000 1.9 > > > +++ share/man/man9/km_alloc.9 2 Oct 2024 14:47:18 -0000 > > > @@ -126,7 +126,7 @@ struct kmem_pa_mode { > > > int kp_nomem; > > > int kp_maxseg; > > > int kp_zero; > > > - int kp_pageable; > > > + int kp_managed; > > > }; > > > .Ed > > > .Bl -tag -width kp_constraint > > > @@ -141,15 +141,14 @@ Physical alignment of the first page in > > > Boundary that the physical addresses can't cross if the allocation is > > > contiguous. > > > .It kp_nomem > > > -A flag that specifies that the allocation should not be backed by physical > > > -pages. > > > +A flag that specifies that no physical page should be allocated up front. > > > .It kp_maxseg > > > Maximal amount of contiguous physical segments in the allocation. > > > .It kp_zero > > > A flag that specifies if the returned memory should be zeroed. > > > -.It kp_pageable > > > -A flag that specifies if the returned memory should be demand paged from the > > > -backing object instead of being allocated up front. > > > +.It kp_managed > > > +A flag that specifies if the returned memory should be mapped paged from the > > > +backing object. > > > .El > > > .Bd -literal > > > struct kmem_dyn_mode { > > > > > > >