Download raw body.
wired pages vs sysctl(2)
On Thu, Feb 27, 2025 at 02:44:13PM +0100, Martin Pieuchot wrote:
> Since the 90s sysctl(2) has been wiring pages that contain the syscall
> arguments.
>
> In uvm/uvm_fault.c r1.129 kettenis@ fixed one of the races exposed by
> syzkaller where a sibling thread munmap(2)ed the range currently wired
> by sysctl(2).
>
> Sadly the fix is incomplete and the reproducer attached triggers the
> following panic:
>
> panic: uvm_fault_unwire_locked: address not in map
> Stopped at db_enter+0x14: popq %rbp
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> *129832 2744 0 0x3 0 0 sysctl_wired
> 84116 96377 0 0x10 0x8 2 sshd-session
> 6897 50184 0 0x14000 0x200 1 softnet0
> db_enter() at db_enter+0x14
> panic(ffffffff8261d529) at panic+0xdd
> uvm_fault_unwire_locked(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire
> _locked+0x246
> uvm_fault_unwire(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire+0x44
> sysctl_vsunlock(20000040,0) at sysctl_vsunlock+0x46
> net_sysctl(ffff800055d411b4,3,20000040,ffff800055d411e8,20000080,c,cf641d03c835
> bbeb) at net_sysctl+0x309
> sys_sysctl(ffff800055c99c78,ffff800055d41300,ffff800055d41270) at sys_sysctl+0x
> 1f8
> syscall(ffff800055d41300) at syscall+0x5ec
> Xsyscall() at Xsyscall+0x128
>
> Syzkaller has two reports for this:
> https://syzkaller.appspot.com/bug?extid=d9b926edfd5f64a66c58
> https://syzkaller.appspot.com/bug?extid=a12540517e3a76a6a490
>
> It is not clear to my why vslock(9) is needed today.
We have sysctl that grab net lock around copyout(9). To avoid
potential NFS activity during copyout(9), wire the memory before
calling NET_LOCK(). Maybe there are more resons to call vslock(9).
> That said the diff
> below completes kettenis@'s approach by making vslock similar to mlock(2).
> With it, vsunlock() now silently fails without crashing the kernel and I
> can no longer reproduce the panic.
>
> ok?
Diff passes full regress on amd64 with witness. And I have a machine
based on 7.4 that runs some telegraf daemon and prints 'pmap_unwire:
wiring for pmap 0xfffffd8e45525378 va 0xc0014fc000 didnt change!'
regulary. With this patch the message disappered.
I am not an UVM expert, but for me it is OK bluhm@
> diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c
> index cf6ea625571..e0f2ee8e08e 100644
> --- sys/uvm/uvm_fault.c
> +++ sys/uvm/uvm_fault.c
> @@ -1782,16 +1782,12 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
> * find the map entry for the current address.
> */
> KASSERT(va >= entry->start);
> - while (entry && va >= entry->end) {
> + while (va >= entry->end) {
> next = RBT_NEXT(uvm_map_addr, entry);
> + KASSERT(next != NULL && next->start <= entry->end);
> entry = next;
> }
>
> - if (entry == NULL)
> - return;
> - if (va < entry->start)
> - continue;
> -
> /*
> * lock it.
> */
> @@ -1819,7 +1815,7 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end)
> }
>
> if (oentry != NULL) {
> - uvm_map_unlock_entry(oentry);
> + uvm_map_unlock_entry(entry);
> }
> }
>
> diff --git sys/uvm/uvm_glue.c sys/uvm/uvm_glue.c
> index 0d7bd9ea10b..63c6f8393d6 100644
> --- sys/uvm/uvm_glue.c
> +++ sys/uvm/uvm_glue.c
> @@ -114,7 +114,7 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
> if (end <= start)
> return (EINVAL);
>
> - return uvm_fault_wire(map, start, end, access_type);
> + return uvm_map_pageable(map, start, end, FALSE, 0);
> }
>
> /*
> @@ -125,13 +125,14 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
> void
> uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
> {
> + struct vm_map *map = &p->p_vmspace->vm_map;
> vaddr_t start, end;
>
> start = trunc_page((vaddr_t)addr);
> end = round_page((vaddr_t)addr + len);
> KASSERT(end > start);
>
> - uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
> + uvm_map_pageable(map, start, end, TRUE, 0);
> }
>
> /*
> /* $OpenBSD$ */
>
> /*
> * MT race reproducer: unmap memory range while sysctl(2) marked it wired.
> */
>
> #include <netinet/in.h>
> #include <netinet/icmp6.h>
> #include <sys/mman.h>
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <sys/sysctl.h>
>
> #include <err.h>
> #include <pthread.h>
> #include <stdint.h>
> #include <stdio.h>
> #include <string.h>
> #include <unistd.h>
>
> static pthread_mutex_t gmtx = PTHREAD_MUTEX_INITIALIZER;
>
> size_t addr = 0x20000000UL;
> size_t len = 0x01000000UL;
>
> void *
> thr_unmap(void *arg)
> {
> pthread_mutex_lock(&gmtx);
>
> for (;;)
> munmap((void *)addr, 0x4000UL);
>
> return NULL;
> }
>
> int
> main(void)
> {
> pthread_t th;
> size_t *olenp;
> void *oldp, *newp;
> char *pg;
> int i, error, pgsz, *mib;
>
> pgsz = getpagesize();
>
> mmap((void *)addr, len, PROT_READ|PROT_WRITE,
> MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
>
> /* fault in all pages. */
> for (i = 0; i < len; i+= pgsz) {
> pg = (char *)(addr + i);
> pg[0] = 1;
> }
>
> mib = (int *)(addr + 0xc0);
> mib[0] = CTL_NET;
> mib[1] = AF_INET6;
> mib[2] = IPPROTO_ICMPV6;
> mib[3] = ICMPV6CTL_REDIRTIMEOUT;
>
> oldp = (void *)(addr + 0x40);
>
> olenp = (size_t *)(addr + 0x100);
> *olenp = 0;
>
> /* new arguments */
> newp = (char *)(addr + 0x80);
> memcpy(newp, "somegarbage", 12);
>
> pthread_mutex_lock(&gmtx);
> /* competing thread. */
> error = pthread_create(&th, NULL, thr_unmap, NULL);
> if (error)
> warn("pthread_create");
>
> /* Ensure memory is still mapped. */
> printf("%s\n", (char *)newp);
> pthread_mutex_unlock(&gmtx);
> for (i = 0; i < (100 * 100); i++) {
> error = sysctl(mib, 4, oldp, olenp, newp, 12);
> }
>
> return 0;
> }
wired pages vs sysctl(2)