From: Martin Pieuchot Subject: wired pages vs sysctl(2) To: tech@openbsd.org Cc: kettenis@openbsd.org Date: Thu, 27 Feb 2025 14:44:13 +0100 Since the 90s sysctl(2) has been wiring pages that contain the syscall arguments. In uvm/uvm_fault.c r1.129 kettenis@ fixed one of the races exposed by syzkaller where a sibling thread munmap(2)ed the range currently wired by sysctl(2). Sadly the fix is incomplete and the reproducer attached triggers the following panic: panic: uvm_fault_unwire_locked: address not in map Stopped at db_enter+0x14: popq %rbp TID PID UID PRFLAGS PFLAGS CPU COMMAND *129832 2744 0 0x3 0 0 sysctl_wired 84116 96377 0 0x10 0x8 2 sshd-session 6897 50184 0 0x14000 0x200 1 softnet0 db_enter() at db_enter+0x14 panic(ffffffff8261d529) at panic+0xdd uvm_fault_unwire_locked(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire _locked+0x246 uvm_fault_unwire(fffffd878156ab58,20000000,20001000) at uvm_fault_unwire+0x44 sysctl_vsunlock(20000040,0) at sysctl_vsunlock+0x46 net_sysctl(ffff800055d411b4,3,20000040,ffff800055d411e8,20000080,c,cf641d03c835 bbeb) at net_sysctl+0x309 sys_sysctl(ffff800055c99c78,ffff800055d41300,ffff800055d41270) at sys_sysctl+0x 1f8 syscall(ffff800055d41300) at syscall+0x5ec Xsyscall() at Xsyscall+0x128 Syzkaller has two reports for this: https://syzkaller.appspot.com/bug?extid=d9b926edfd5f64a66c58 https://syzkaller.appspot.com/bug?extid=a12540517e3a76a6a490 It is not clear to my why vslock(9) is needed today. That said the diff below completes kettenis@'s approach by making vslock similar to mlock(2). With it, vsunlock() now silently fails without crashing the kernel and I can no longer reproduce the panic. ok? diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c index cf6ea625571..e0f2ee8e08e 100644 --- sys/uvm/uvm_fault.c +++ sys/uvm/uvm_fault.c @@ -1782,16 +1782,12 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) * find the map entry for the current address. */ KASSERT(va >= entry->start); - while (entry && va >= entry->end) { + while (va >= entry->end) { next = RBT_NEXT(uvm_map_addr, entry); + KASSERT(next != NULL && next->start <= entry->end); entry = next; } - if (entry == NULL) - return; - if (va < entry->start) - continue; - /* * lock it. */ @@ -1819,7 +1815,7 @@ uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) } if (oentry != NULL) { - uvm_map_unlock_entry(oentry); + uvm_map_unlock_entry(entry); } } diff --git sys/uvm/uvm_glue.c sys/uvm/uvm_glue.c index 0d7bd9ea10b..63c6f8393d6 100644 --- sys/uvm/uvm_glue.c +++ sys/uvm/uvm_glue.c @@ -114,7 +114,7 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type) if (end <= start) return (EINVAL); - return uvm_fault_wire(map, start, end, access_type); + return uvm_map_pageable(map, start, end, FALSE, 0); } /* @@ -125,13 +125,14 @@ uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type) void uvm_vsunlock(struct proc *p, caddr_t addr, size_t len) { + struct vm_map *map = &p->p_vmspace->vm_map; vaddr_t start, end; start = trunc_page((vaddr_t)addr); end = round_page((vaddr_t)addr + len); KASSERT(end > start); - uvm_fault_unwire(&p->p_vmspace->vm_map, start, end); + uvm_map_pageable(map, start, end, TRUE, 0); } /* /* $OpenBSD$ */ /* * MT race reproducer: unmap memory range while sysctl(2) marked it wired. */ #include #include #include #include #include #include #include #include #include #include #include #include static pthread_mutex_t gmtx = PTHREAD_MUTEX_INITIALIZER; size_t addr = 0x20000000UL; size_t len = 0x01000000UL; void * thr_unmap(void *arg) { pthread_mutex_lock(&gmtx); for (;;) munmap((void *)addr, 0x4000UL); return NULL; } int main(void) { pthread_t th; size_t *olenp; void *oldp, *newp; char *pg; int i, error, pgsz, *mib; pgsz = getpagesize(); mmap((void *)addr, len, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0); /* fault in all pages. */ for (i = 0; i < len; i+= pgsz) { pg = (char *)(addr + i); pg[0] = 1; } mib = (int *)(addr + 0xc0); mib[0] = CTL_NET; mib[1] = AF_INET6; mib[2] = IPPROTO_ICMPV6; mib[3] = ICMPV6CTL_REDIRTIMEOUT; oldp = (void *)(addr + 0x40); olenp = (size_t *)(addr + 0x100); *olenp = 0; /* new arguments */ newp = (char *)(addr + 0x80); memcpy(newp, "somegarbage", 12); pthread_mutex_lock(&gmtx); /* competing thread. */ error = pthread_create(&th, NULL, thr_unmap, NULL); if (error) warn("pthread_create"); /* Ensure memory is still mapped. */ printf("%s\n", (char *)newp); pthread_mutex_unlock(&gmtx); for (i = 0; i < (100 * 100); i++) { error = sysctl(mib, 4, oldp, olenp, newp, 12); } return 0; }